easyvvuq.analysis.sc_analysis

ANALYSIS CLASS FOR THE SC SAMPLER

View Source

   1"""
   2ANALYSIS CLASS FOR THE SC SAMPLER
   3"""
   4
   5import numpy as np
   6import chaospy as cp
   7from itertools import product, chain, combinations
   8import warnings
   9import pickle
  10import copy
  11from easyvvuq import OutputType
  12from .base import BaseAnalysisElement
  13from .results import AnalysisResults
  14import logging
  15import pandas as pd
  16
  17__author__ = "Wouter Edeling"
  18__copyright__ = """
  19
  20    Copyright 2018 Robin A. Richardson, David W. Wright
  21
  22    This file is part of EasyVVUQ
  23
  24    EasyVVUQ is free software: you can redistribute it and/or modify
  25    it under the terms of the Lesser GNU General Public License as published by
  26    the Free Software Foundation, either version 3 of the License, or
  27    (at your option) any later version.
  28
  29    EasyVVUQ is distributed in the hope that it will be useful,
  30    but WITHOUT ANY WARRANTY; without even the implied warranty of
  31    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  32    Lesser GNU General Public License for more details.
  33
  34    You should have received a copy of the Lesser GNU General Public License
  35    along with this program.  If not, see <https://www.gnu.org/licenses/>.
  36
  37"""
  38__license__ = "LGPL"
  39
  40
  41class SCAnalysisResults(AnalysisResults):
  42    def _get_sobols_first(self, qoi, input_):
  43        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
  44        result = raw_dict[AnalysisResults._to_tuple(qoi)][input_]
  45        return np.array(result)
  46        # try:
  47        #     # return np.array([float(result)])
  48        #     return np.array([result[0]])
  49        # except TypeError:
  50        #     return np.array(result)
  51
  52    def supported_stats(self):
  53        """Types of statistics supported by the describe method.
  54
  55        Returns
  56        -------
  57        list of str
  58        """
  59        return ['mean', 'var', 'std']
  60
  61    def _describe(self, qoi, statistic):
  62        if statistic in self.supported_stats():
  63            return self.raw_data['statistical_moments'][qoi][statistic]
  64        else:
  65            raise NotImplementedError
  66
  67    def surrogate(self):
  68        """Return an SC surrogate model.
  69
  70        Returns
  71        -------
  72        A function that takes a dictionary of parameter - value pairs and returns
  73        a dictionary with the results (same output as decoder).
  74        """
  75        def surrogate_fn(inputs):
  76            def swap(x):
  77                if len(x) > 1:
  78                    return list(x)
  79                else:
  80                    return x[0]
  81            values = np.squeeze(np.array([inputs[key] for key in self.inputs])).T
  82            results = dict([(qoi, swap(self.surrogate_(qoi, values))) for qoi in self.qois])
  83            return results
  84        return surrogate_fn
  85
  86
  87class SCAnalysis(BaseAnalysisElement):
  88
  89    def __init__(self, sampler=None, qoi_cols=None):
  90        """
  91        Parameters
  92        ----------
  93        sampler : SCSampler
  94            Sampler used to initiate the SC analysis
  95        qoi_cols : list or None
  96            Column names for quantities of interest (for which analysis is
  97            performed).
  98        """
  99
 100        if sampler is None:
 101            msg = 'SC analysis requires a paired sampler to be passed'
 102            raise RuntimeError(msg)
 103
 104        if qoi_cols is None:
 105            raise RuntimeError("Analysis element requires a list of "
 106                               "quantities of interest (qoi)")
 107
 108        self.qoi_cols = qoi_cols
 109        self.output_type = OutputType.SUMMARY
 110        self.sampler = sampler
 111        self.dimension_adaptive = sampler.dimension_adaptive
 112        if self.dimension_adaptive:
 113            self.adaptation_errors = []
 114            self.mean_history = []
 115            self.std_history = []
 116        self.sparse = sampler.sparse
 117        self.pce_coefs = {}
 118        self.N_qoi = {}
 119        for qoi_k in qoi_cols:
 120            self.pce_coefs[qoi_k] = {}
 121            self.N_qoi[qoi_k] = 0
 122
 123    def element_name(self):
 124        """Name for this element for logging purposes"""
 125        return "SC_Analysis"
 126
 127    def element_version(self):
 128        """Version of this element for logging purposes"""
 129        return "0.5"
 130
 131    def save_state(self, filename):
 132        """Saves the complete state of the analysis object to a pickle file,
 133        except the sampler object (self.samples).
 134
 135        Parameters
 136        ----------
 137        filename : string
 138            name to the file to write the state to
 139        """
 140        logging.debug("Saving analysis state to %s" % filename)
 141        # make a copy of the state, and do not store the sampler as well
 142        state = copy.copy(self.__dict__)
 143        del state['sampler']
 144        file = open(filename, 'wb')
 145        pickle.dump(state, file)
 146        file.close()
 147
 148    def load_state(self, filename):
 149        """Loads the complete state of the analysis object from a
 150        pickle file, stored using save_state.
 151
 152        Parameters
 153        ----------
 154        filename : string
 155            name of the file to load
 156        """
 157        logging.debug("Loading analysis state from %s" % filename)
 158        file = open(filename, 'rb')
 159        state = pickle.load(file)
 160        for key in state.keys():
 161            self.__dict__[key] = state[key]
 162        file.close()
 163
 164    def analyse(self, data_frame=None, compute_moments=True, compute_Sobols=True):
 165        """Perform SC analysis on input `data_frame`.
 166
 167        Parameters
 168        ----------
 169        data_frame : pandas.DataFrame
 170            Input data for analysis.
 171
 172        Returns
 173        -------
 174        dict
 175            Results dictionary with sub-dicts with keys:
 176            ['statistical_moments', 'sobol_indices'].
 177            Each dict has an entry for each item in `qoi_cols`.
 178        """
 179
 180        if data_frame is None:
 181            raise RuntimeError("Analysis element needs a data frame to "
 182                               "analyse")
 183        elif isinstance(data_frame, pd.DataFrame) and data_frame.empty:
 184            raise RuntimeError(
 185                "No data in data frame passed to analyse element")
 186
 187        # the number of uncertain parameters
 188        self.N = self.sampler.N
 189        # tensor grid
 190        self.xi_d = self.sampler.xi_d
 191        # the maximum level (quad order) of the (sparse) grid
 192        self.L = self.sampler.L
 193
 194        # if L < L_min: quadratures and interpolations are zero
 195        # For full tensor grid: there is only one level: L_min = L
 196        if not self.sparse:
 197            self.L_min = self.L
 198            self.l_norm = np.array([self.sampler.polynomial_order])
 199            self.l_norm_min = self.l_norm
 200        # For sparse grid: one or more levels
 201        else:
 202            self.L_min = 1
 203            # multi indices (stored in l_norm) for isotropic sparse grid or
 204            # dimension-adaptive grid before the 1st refinement.
 205            # If dimension_adaptive and nadaptations > 0: l_norm
 206            # is computed in self.adaptation_metric
 207            if not self.dimension_adaptive or self.sampler.nadaptations == 0:
 208                # the maximum level (quad order) of the (sparse) grid
 209                self.l_norm = self.sampler.compute_sparse_multi_idx(self.L, self.N)
 210
 211            self.l_norm_min = np.ones(self.N, dtype=int)
 212
 213        # #compute generalized combination coefficients
 214        self.comb_coef = self.compute_comb_coef()
 215
 216        # 1d weights and points per level
 217        self.xi_1d = self.sampler.xi_1d
 218        # self.wi_1d = self.compute_SC_weights(rule=self.sampler.quad_rule)
 219        self.wi_1d = self.sampler.wi_1d
 220
 221        # Extract output values for each quantity of interest from Dataframe
 222        logging.debug('Loading samples...')
 223        qoi_cols = self.qoi_cols
 224        samples = {k: [] for k in qoi_cols}
 225        for run_id in data_frame[('run_id', 0)].unique():
 226            for k in qoi_cols:
 227                values = data_frame.loc[data_frame[('run_id', 0)] == run_id][k].values
 228                samples[k].append(values.flatten())
 229        self.samples = samples
 230        logging.debug('done')
 231
 232        # assume that self.l_norm has changed, and that the interpolation
 233        # must be initialised, see sc_expansion subroutine
 234        self.init_interpolation = True
 235
 236        # same pce coefs must be computed for every qoi
 237        if self.sparse:
 238            for qoi_k in qoi_cols:
 239                self.pce_coefs[qoi_k] = self.SC2PCE(samples[qoi_k], qoi_k)
 240                # size of one code sample
 241                self.N_qoi[qoi_k] = self.samples[qoi_k][0].size
 242
 243        # Compute descriptive statistics for each quantity of interest
 244        results = {'statistical_moments': {},
 245                   'sobols_first': {k: {} for k in self.qoi_cols},
 246                   'sobols': {k: {} for k in self.qoi_cols}}
 247
 248        if compute_moments:
 249            for qoi_k in qoi_cols:
 250                if not self.sparse:
 251                    mean_k, var_k = self.get_moments(qoi_k)
 252                    std_k = np.sqrt(var_k)
 253                else:
 254                    self.pce_coefs[qoi_k] = self.SC2PCE(self.samples[qoi_k], qoi_k)
 255                    mean_k, var_k, _ = self.get_pce_stats(self.l_norm, self.pce_coefs[qoi_k],
 256                                                          self.comb_coef)
 257                    std_k = np.sqrt(var_k)
 258
 259                # compute statistical moments
 260                results['statistical_moments'][qoi_k] = {'mean': mean_k,
 261                                                         'var': var_k,
 262                                                         'std': std_k}
 263
 264        if compute_Sobols:
 265            for qoi_k in qoi_cols:
 266                if not self.sparse:
 267                    results['sobols'][qoi_k] = self.get_sobol_indices(qoi_k, 'first_order')
 268                else:
 269                    _, _, _, results['sobols'][qoi_k] = self.get_pce_sobol_indices(
 270                        qoi_k, 'first_order')
 271
 272                for idx, param_name in enumerate(self.sampler.vary.get_keys()):
 273                    results['sobols_first'][qoi_k][param_name] = \
 274                        results['sobols'][qoi_k][(idx,)]
 275
 276        results = SCAnalysisResults(raw_data=results, samples=data_frame,
 277                                    qois=qoi_cols, inputs=list(self.sampler.vary.get_keys()))
 278        results.surrogate_ = self.surrogate
 279        return results
 280
 281    def compute_comb_coef(self, **kwargs):
 282        """Compute general combination coefficients. These are the coefficients
 283        multiplying the tensor products associated to each multi index l,
 284        see page 12 Gerstner & Griebel, numerical integration using sparse grids
 285        """
 286
 287        if 'l_norm' in kwargs:
 288            l_norm = kwargs['l_norm']
 289        else:
 290            l_norm = self.l_norm
 291
 292        comb_coef = {}
 293        logging.debug('Computing combination coefficients...')
 294        for k in l_norm:
 295            coef = 0.0
 296            # for every k, subtract all multi indices
 297            for l in l_norm:
 298                z = l - k
 299                # if the results contains only 0's and 1's, then z is the
 300                # vector that can be formed from a tensor product of unit vectors
 301                # for which k+z is in self.l_norm
 302                if np.array_equal(z, z.astype(bool)):
 303                    coef += (-1)**(np.sum(z))
 304            comb_coef[tuple(k)] = coef
 305        logging.debug('done')
 306        return comb_coef
 307
 308    def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
 309                        method='surplus', **kwargs):
 310        """Compute the adaptation metric and decide which of the admissible
 311        level indices to include in next iteration of the sparse grid. The
 312        adaptation metric is based on the hierarchical surplus, defined as the
 313        difference between the new code values of the admissible level indices,
 314        and the SC surrogate of the previous iteration. Alternatively, it can be
 315        based on the difference between the output mean of the current level,
 316        and the mean computed with one extra admissible index.
 317
 318        This subroutine must be called AFTER the code is evaluated at
 319        the new points, but BEFORE the analysis is performed.
 320
 321        Parameters
 322        ----------
 323        qoi : string
 324            the name of the quantity of interest which is used
 325            to base the adaptation metric on.
 326        data_frame : pandas.DataFrame
 327        store_stats_history : bool
 328            store the mean and variance at each refinement in self.mean_history
 329            and self.std_history. Used for checking convergence in the statistics
 330            over the refinement iterations
 331        method : string
 332            name of the refinement error, default is 'surplus'. In this case the
 333            error is based on the hierarchical surplus, which is an interpolation
 334            based error. Another possibility is 'var',
 335            in which case the error is based on the difference in the
 336            variance between the current estimate and the estimate obtained
 337            when a particular candidate direction is added.
 338        """
 339        logging.debug('Refining sampling plan...')
 340        # load the code samples
 341        samples = []
 342        if isinstance(data_frame, pd.DataFrame):
 343            for run_id in data_frame[('run_id', 0)].unique():
 344                values = data_frame.loc[data_frame[('run_id', 0)] == run_id][qoi].values
 345                samples.append(values.flatten())
 346
 347        if method == 'var':
 348            all_idx = np.concatenate((self.l_norm, self.sampler.admissible_idx))
 349            self.xi_1d = self.sampler.xi_1d
 350            self.wi_1d = self.sampler.wi_1d
 351            self.pce_coefs[qoi] = self.SC2PCE(samples, qoi, verbose=True, l_norm=all_idx,
 352                                              xi_d=self.sampler.xi_d)
 353            _, var_l, _ = self.get_pce_stats(self.l_norm, self.pce_coefs[qoi], self.comb_coef)
 354
 355        # the currently accepted grid points
 356        xi_d_accepted = self.sampler.generate_grid(self.l_norm)
 357
 358        # compute the hierarchical surplus based error for every admissible l
 359        error = {}
 360        for l in self.sampler.admissible_idx:
 361            error[tuple(l)] = []
 362            # compute the error based on the hierarchical surplus (interpolation based)
 363            if method == 'surplus':
 364                # collocation points of current level index l
 365                X_l = [self.sampler.xi_1d[n][l[n]] for n in range(self.N)]
 366                X_l = np.array(list(product(*X_l)))
 367                # only consider new points, subtract the accepted points
 368                X_l = setdiff2d(X_l, xi_d_accepted)
 369                for xi in X_l:
 370                    # find the location of the current xi in the global grid
 371                    idx = np.where((xi == self.sampler.xi_d).all(axis=1))[0][0]
 372                    # hierarchical surplus error at xi
 373                    hier_surplus = samples[idx] - self.surrogate(qoi, xi)
 374                    if 'index' in kwargs:
 375                        hier_surplus = hier_surplus[kwargs['index']]
 376                        error[tuple(l)].append(np.abs(hier_surplus))
 377                    else:
 378                        error[tuple(l)].append(np.linalg.norm(hier_surplus, np.inf))
 379                # compute mean error over all points in X_l
 380                error[tuple(l)] = np.mean(error[tuple(l)])
 381            # compute the error based on quadrature of the variance
 382            elif method == 'var':
 383                # create a candidate set of multi indices by adding the current
 384                # admissible index to l_norm
 385                candidate_l_norm = np.concatenate((self.l_norm, l.reshape([1, self.N])))
 386                # now we must recompute the combination coefficients
 387                c_l = self.compute_comb_coef(l_norm=candidate_l_norm)
 388                _, var_candidate_l, _ = self.get_pce_stats(
 389                    candidate_l_norm, self.pce_coefs[qoi], c_l)
 390                # error in var
 391                error[tuple(l)] = np.linalg.norm(var_candidate_l - var_l, np.inf)
 392            else:
 393                logging.debug('Specified refinement method %s not recognized' % method)
 394                logging.debug('Accepted are surplus, mean or var')
 395                import sys
 396                sys.exit()
 397
 398        for key in error.keys():
 399            # logging.debug("Surplus error when l = %s is %s" % (key, error[key]))
 400            logging.debug("Refinement error for l = %s is %s" % (key, error[key]))
 401        # find the admissble index with the largest error
 402        l_star = np.array(max(error, key=error.get)).reshape([1, self.N])
 403        # logging.debug('Selecting %s for refinement.' % l_star)
 404        logging.debug('Selecting %s for refinement.' % l_star)
 405        # add max error to list
 406        self.adaptation_errors.append(max(error.values()))
 407
 408        # add l_star to the current accepted level indices
 409        self.l_norm = np.concatenate((self.l_norm, l_star))
 410        # if someone executes this function twice for some reason,
 411        # remove the duplicate l_star entry. Keep order unaltered
 412        idx = np.unique(self.l_norm, axis=0, return_index=True)[1]
 413        self.l_norm = np.array([self.l_norm[i] for i in sorted(idx)])
 414
 415        # peform the analyse step, but do not compute moments and Sobols
 416        self.analyse(data_frame, compute_moments=False, compute_Sobols=False)
 417
 418        # if True store the mean and variance at eacht iteration of the adaptive
 419        # algorithmn
 420        if store_stats_history:
 421            # mean_f, var_f = self.get_moments(qoi)
 422            logging.debug('Storing moments of iteration %d' % self.sampler.nadaptations)
 423            pce_coefs = self.SC2PCE(samples, qoi, verbose=True)
 424            mean_f, var_f, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
 425            self.mean_history.append(mean_f)
 426            self.std_history.append(var_f)
 427            logging.debug('done')
 428
 429    def merge_accepted_and_admissible(self, level=0, **kwargs):
 430        """In the case of the dimension-adaptive sampler, there are 2 sets of
 431        quadrature multi indices. There are the accepted indices that are actually
 432        used in the analysis, and the admissible indices, of which some might
 433        move to the accepted set in subsequent iterations. This subroutine merges
 434        the two sets of multi indices by moving all admissible to the set of
 435        accepted indices.
 436        Do this at the end, when no more refinements will be executed. The
 437        samples related to the admissble indices are already computed, although
 438        not used in the analysis. By executing this subroutine at very end, all
 439        computed samples are used during the final postprocessing stage. Execute
 440        campaign.apply_analysis to let the new set of indices take effect.
 441        If further refinements are executed after all via sampler.look_ahead, the
 442        number of new admissible samples to be computed can be very high,
 443        especially in high dimensions. It is possible to undo the merge via
 444        analysis.undo_merge before new refinements are made. Execute
 445        campaign.apply_analysis again to let the old set of indices take effect.
 446        """
 447
 448        if 'include' in kwargs:
 449            include = kwargs['include']
 450        else:
 451            include = np.arange(self.N)
 452
 453        if self.sampler.dimension_adaptive:
 454            logging.debug('Moving admissible indices to the accepted set...')
 455            # make a backup of l_norm, such that undo_merge can revert back
 456            self.l_norm_backup = np.copy(self.l_norm)
 457            # merge admissible and accepted multi indices
 458            if level == 0:
 459                merged_l = np.concatenate((self.l_norm, self.sampler.admissible_idx))
 460            else:
 461                admissible_idx = []
 462                count = 0
 463                for l in self.sampler.admissible_idx:
 464                    L = np.sum(l) - self.N + 1
 465                    tmp = np.where(l == L)[0]
 466                    if L <= level and np.in1d(tmp, include)[0]:
 467                        admissible_idx.append(l)
 468                        count += 1
 469                admissible_idx = np.array(admissible_idx).reshape([count, self.N])
 470                merged_l = np.concatenate((self.l_norm, admissible_idx))
 471            # make sure final result contains only unique indices and store
 472            # results in l_norm
 473            idx = np.unique(merged_l, axis=0, return_index=True)[1]
 474            # return np.array([merged_l[i] for i in sorted(idx)])
 475            self.l_norm = np.array([merged_l[i] for i in sorted(idx)])
 476            logging.debug('done')
 477
 478    def undo_merge(self):
 479        """This reverses the effect of the merge_accepted_and_admissble subroutine.
 480        Execute if further refinement are required after all.
 481        """
 482        if self.sampler.dimension_adaptive:
 483            self.l_norm = self.l_norm_backup
 484            logging.debug('Restored old multi indices.')
 485
 486    def get_adaptation_errors(self):
 487        """Returns self.adaptation_errors
 488        """
 489        return self.adaptation_errors
 490
 491    def plot_stat_convergence(self):
 492        """Plots the convergence of the statistical mean and std dev over the different
 493        refinements in a dimension-adaptive setting. Specifically the inf norm
 494        of the difference between the stats of iteration i and iteration i-1
 495        is plotted.
 496        """
 497        if not self.dimension_adaptive:
 498            logging.debug('Only works for the dimension adaptive sampler.')
 499            return
 500
 501        K = len(self.mean_history)
 502        if K < 2:
 503            logging.debug('Means from at least two refinements are required')
 504            return
 505        else:
 506            differ_mean = np.zeros(K - 1)
 507            differ_std = np.zeros(K - 1)
 508            for i in range(1, K):
 509                differ_mean[i - 1] = np.linalg.norm(self.mean_history[i] -
 510                                                    self.mean_history[i - 1], np.inf)
 511                # make relative
 512                differ_mean[i - 1] = differ_mean[i - 1] / np.linalg.norm(self.mean_history[i - 1],
 513                                                                         np.inf)
 514
 515                differ_std[i - 1] = np.linalg.norm(self.std_history[i] -
 516                                                   self.std_history[i - 1], np.inf)
 517                # make relative
 518                differ_std[i - 1] = differ_std[i - 1] / np.linalg.norm(self.std_history[i - 1],
 519                                                                       np.inf)
 520
 521        import matplotlib.pyplot as plt
 522        fig = plt.figure('stat_conv')
 523        ax1 = fig.add_subplot(111, title='moment convergence')
 524        ax1.set_xlabel('iteration', fontsize=12)
 525        # ax1.set_ylabel(r'$ ||\mathrm{mean}_i - \mathrm{mean}_{i - 1}||_\infty$',
 526        # color='r', fontsize=12)
 527        ax1.set_ylabel(r'relative error mean', color='r', fontsize=12)
 528        ax1.plot(range(2, K + 1), differ_mean, color='r', marker='+')
 529        ax1.tick_params(axis='y', labelcolor='r')
 530
 531        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
 532
 533        # ax2.set_ylabel(r'$ ||\mathrm{var}_i - \mathrm{var}_{i - 1}||_\infty$',
 534        # color='b', fontsize=12)
 535        ax2.set_ylabel(r'relative error variance', fontsize=12, color='b')
 536        ax2.plot(range(2, K + 1), differ_std, color='b', marker='*')
 537        ax2.tick_params(axis='y', labelcolor='b')
 538
 539        plt.tight_layout()
 540        plt.show()
 541
 542    def surrogate(self, qoi, x, L=None):
 543        """Use sc_expansion UQP as a surrogate
 544
 545        Parameters
 546        ----------
 547        qoi : str
 548            name of the qoi
 549        x : array
 550            location at which to evaluate the surrogate
 551        L : int
 552            level of the (sparse) grid, default = self.L
 553
 554        Returns
 555        -------
 556        the interpolated value of qoi at x (float, (N_qoi,))
 557        """
 558
 559        return self.sc_expansion(self.samples[qoi], x=x)
 560
 561    def quadrature(self, qoi, samples=None):
 562        """Computes a (Smolyak) quadrature
 563
 564        Parameters
 565        ----------
 566        qoi : str
 567            name of the qoi
 568
 569        samples: array
 570            compute the mean by setting samples = self.samples.
 571            To compute the variance, set samples = (self.samples - mean)**2
 572
 573        Returns
 574        -------
 575        the quadrature of qoi
 576        """
 577        if samples is None:
 578            samples = self.samples[qoi]
 579
 580        return self.combination_technique(qoi, samples)
 581
 582    def combination_technique(self, qoi, samples=None, **kwargs):
 583        """Efficient quadrature formulation for (sparse) grids. See:
 584
 585            Gerstner, Griebel, "Numerical integration using sparse grids"
 586            Uses the general combination technique (page 12).
 587
 588        Parameters
 589        ----------
 590        qoi : str
 591            name of the qoi
 592
 593        samples : array
 594            compute the mean by setting samples = self.samples.
 595            To compute the variance, set samples = (self.samples - mean)**2
 596        """
 597
 598        if samples is None:
 599            samples = self.samples[qoi]
 600
 601        # In the case of quadrature-based refinement, we need to specify
 602        # l_norm, comb_coef and xi_d other than the current defualt values
 603        if 'l_norm' in kwargs:
 604            l_norm = kwargs['l_norm']
 605        else:
 606            l_norm = self.l_norm
 607
 608        if 'comb_coef' in kwargs:
 609            comb_coef = kwargs['comb_coef']
 610        else:
 611            comb_coef = self.comb_coef
 612
 613        if 'xi_d' in kwargs:
 614            xi_d = kwargs['xi_d']
 615        else:
 616            xi_d = self.xi_d
 617
 618        # quadrature Q
 619        Q = 0.0
 620
 621        # loop over l
 622        for l in l_norm:
 623            # compute the tensor product of parameter and weight values
 624            X_k = [self.xi_1d[n][l[n]] for n in range(self.N)]
 625            W_k = [self.wi_1d[n][l[n]] for n in range(self.N)]
 626
 627            X_k = np.array(list(product(*X_k)))
 628            W_k = np.array(list(product(*W_k)))
 629            W_k = np.prod(W_k, axis=1)
 630            W_k = W_k.reshape([W_k.shape[0], 1])
 631
 632            # scaling factor of combination technique
 633            W_k = W_k * comb_coef[tuple(l)]
 634
 635            # find corresponding code values
 636            f_k = np.array([samples[np.where((x == xi_d).all(axis=1))[0][0]] for x in X_k])
 637
 638            # quadrature of Q^1_{k1} X ... X Q^1_{kN} product
 639            Q = Q + np.sum(f_k * W_k, axis=0).T
 640
 641        return Q
 642
 643    def get_moments(self, qoi):
 644        """
 645        Parameters
 646        ----------
 647        qoi : str
 648            name of the qoi
 649
 650        Returns
 651        -------
 652        mean and variance of qoi (float (N_qoi,))
 653        """
 654        logging.debug('Computing moments...')
 655        # compute mean
 656        mean_f = self.quadrature(qoi)
 657        # compute variance
 658        variance_samples = [(sample - mean_f)**2 for sample in self.samples[qoi]]
 659        var_f = self.quadrature(qoi, samples=variance_samples)
 660        logging.debug('done')
 661        return mean_f, var_f
 662
 663    def sc_expansion(self, samples, x):
 664        """
 665        Non recursive implementation of the SC expansion. Performs interpolation
 666        of code output samples for both full and sparse grids.
 667
 668        Parameters
 669        ----------
 670        samples : list
 671            list of code output samples.
 672        x : array
 673            One or more locations in stochastic space at which to evaluate
 674            the surrogate.
 675
 676        Returns
 677        -------
 678        surr : array
 679            The interpolated values of the code output at input locations
 680            specified by x.
 681
 682        """
 683        # Computing the tensor grid of each multiindex l (xi_d below)
 684        # every time is slow. Instead store it globally, and only recompute when
 685        # self.l_norm has changed, when the flag init_interpolation = True.
 686        # This flag is set to True when self.analyse is executed
 687        if self.init_interpolation:
 688            self.xi_d_per_l = {}
 689            for l in self.l_norm:
 690                # all points corresponding to l
 691                xi = [self.xi_1d[n][l[n]] for n in range(self.N)]
 692                self.xi_d_per_l[tuple(l)] = np.array(list(product(*xi)))
 693            self.init_interpolation = False
 694
 695        surr = 0.0
 696        for l in self.l_norm:
 697            # all points corresponding to l
 698            # xi = [self.xi_1d[n][l[n]] for n in range(self.N)]
 699            # xi_d = np.array(list(product(*xi)))
 700            xi_d = self.xi_d_per_l[tuple(l)]
 701
 702            for xi in xi_d:
 703                # indices of current collocation point
 704                # in corresponding 1d colloc points (self.xi_1d[n][l[n]])
 705                # These are the j of the 1D lagrange polynomials l_j(x), see
 706                # lagrange_poly subroutine
 707                idx = [(self.xi_1d[n][l[n]] == xi[n]).nonzero()[0][0] for n in range(self.N)]
 708                # index of the code sample
 709                sample_idx = np.where((xi == self.xi_d).all(axis=1))[0][0]
 710
 711                # values of Lagrange polynomials at x
 712                if x.ndim == 1:
 713                    weight = [lagrange_poly(x[n], self.xi_1d[n][l[n]], idx[n])
 714                              for n in range(self.N)]
 715                    surr += self.comb_coef[tuple(l)] * samples[sample_idx] * np.prod(weight, axis=0)
 716                # batch setting, if multiple x values are presribed
 717                else:
 718                    weight = [lagrange_poly(x[:, n], self.xi_1d[n][l[n]], idx[n])
 719                              for n in range(self.N)]
 720                    surr += self.comb_coef[tuple(l)] * samples[sample_idx] * \
 721                        np.prod(weight, axis=0).reshape([-1, 1])
 722
 723        return surr
 724
 725    def get_sample_array(self, qoi):
 726        """
 727        Parameters
 728        ----------
 729        qoi : str
 730            name of quantity of interest
 731
 732        Returns
 733        -------
 734        array of all samples of qoi
 735        """
 736        return np.array([self.samples[qoi][k] for k in range(len(self.samples[qoi]))])
 737
 738    def adaptation_histogram(self):
 739        """Plots a bar chart of the maximum order of the quadrature rule
 740        that is used in each dimension. Use in case of the dimension adaptive
 741        sampler to get an idea of which parameters were more refined than others.
 742        This gives only a first-order idea, as it only plots the max quad
 743        order independently per input parameter, so higher-order refinements
 744        that were made do not show up in the bar chart.
 745        """
 746        import matplotlib.pyplot as plt
 747
 748        fig = plt.figure('adapt_hist', figsize=[4, 8])
 749        ax = fig.add_subplot(111, ylabel='max quadrature order',
 750                             title='Number of refinements = %d'
 751                             % self.sampler.nadaptations)
 752        # find max quad order for every parameter
 753        adapt_measure = np.max(self.l_norm, axis=0)
 754        ax.bar(range(adapt_measure.size), height=adapt_measure - 1)
 755        params = list(self.sampler.vary.get_keys())
 756        ax.set_xticks(range(adapt_measure.size))
 757        ax.set_xticklabels(params)
 758        plt.xticks(rotation=90)
 759        plt.tight_layout()
 760        plt.show()
 761
 762    def adaptation_table(self, **kwargs):
 763        """Plots a color-coded table of the quadrature-order refinement.
 764        Shows in what order the parameters were refined, and unlike
 765        adaptation_histogram, this also shows higher-order refinements.
 766
 767        Parameters
 768        ----------
 769        **kwargs: can contain kwarg 'order' to specify the order in which
 770        the variables on the x axis are plotted (e.g. in order of decreasing
 771        1st order Sobol index).
 772
 773        Returns
 774        -------
 775        None.
 776
 777        """
 778
 779        # if specified, plot the variables on the x axis in a given order
 780        if 'order' in kwargs:
 781            order = kwargs['order']
 782        else:
 783            order = range(self.N)
 784
 785        l = np.copy(self.l_norm)[:, order]
 786
 787        import matplotlib as mpl
 788        import matplotlib.pyplot as plt
 789
 790        fig = plt.figure(figsize=[12, 6])
 791        ax = fig.add_subplot(111)
 792
 793        # max quad order
 794        M = np.max(l)
 795        cmap = plt.get_cmap('Purples', M)
 796        # plot 'heat map' of refinement
 797        im = plt.imshow(l.T, cmap=cmap, aspect='auto')
 798        norm = mpl.colors.Normalize(vmin=0, vmax=M - 1)
 799        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
 800        sm.set_array([])
 801        cb = plt.colorbar(im)
 802        # plot the quad order in the middle of the colorbar intervals
 803        p = np.linspace(1, M, M+1)
 804        tick_p = 0.5 * (p[1:] + p[0:-1])
 805        cb.set_ticks(tick_p)
 806        cb.set_ticklabels(np.arange(M))
 807        cb.set_label(r'quadrature order')
 808        # plot the variables names on the x axis
 809        ax.set_yticks(range(l.shape[1]))
 810        params = np.array(list(self.sampler.vary.get_keys()))
 811        ax.set_yticklabels(params[order], fontsize=12)
 812        # ax.set_yticks(range(l.shape[0]))
 813        ax.set_xlabel('iteration')
 814        # plt.yticks(rotation=90)
 815        plt.tight_layout()
 816        plt.show()
 817
 818    def plot_grid(self):
 819        """Plots the collocation points for 2 and 3 dimensional problems
 820        """
 821        import matplotlib.pyplot as plt
 822
 823        if self.N == 2:
 824            fig = plt.figure()
 825            ax = fig.add_subplot(111, xlabel=r'$x_1$', ylabel=r'$x_2$')
 826            ax.plot(self.xi_d[:, 0], self.xi_d[:, 1], 'ro')
 827            plt.tight_layout()
 828            plt.show()
 829        elif self.N == 3:
 830            from mpl_toolkits.mplot3d import Axes3D
 831            fig = plt.figure()
 832            ax = fig.add_subplot(111, projection='3d', xlabel=r'$x_1$',
 833                                 ylabel=r'$x_2$', zlabel=r'$x_3$')
 834            ax.scatter(self.xi_d[:, 0], self.xi_d[:, 1], self.xi_d[:, 2])
 835            plt.tight_layout()
 836            plt.show()
 837        else:
 838            logging.debug('Will only plot for N = 2 or N = 3.')
 839
 840    def SC2PCE(self, samples, qoi, verbose=True, **kwargs):
 841        """Computes the Polynomials Chaos Expansion coefficients from the SC
 842        expansion via a transformation of basis (Lagrange polynomials basis -->
 843        orthonomial basis).
 844
 845        Parameters
 846        ----------
 847        samples : array
 848            SC code samples from which to compute the PCE coefficients
 849
 850        qoi : string
 851            Name of the QoI.
 852
 853        Returns
 854        -------
 855        pce_coefs : dict
 856            PCE coefficients per multi index l
 857        """
 858        pce_coefs = {}
 859
 860        if 'l_norm' in kwargs:
 861            l_norm = kwargs['l_norm']
 862        else:
 863            l_norm = self.l_norm
 864
 865        if 'xi_d' in kwargs:
 866            xi_d = kwargs['xi_d']
 867        else:
 868            xi_d = self.xi_d
 869
 870        # if not hasattr(self, 'pce_coefs'):
 871        #     self.pce_coefs = {}
 872
 873        count_l = 1
 874        for l in l_norm:
 875            if not tuple(l) in self.pce_coefs[qoi].keys():
 876                # pce coefficients for current multi-index l
 877                pce_coefs[tuple(l)] = {}
 878
 879                # 1d points generated by l
 880                x_1d = [self.xi_1d[n][l[n]] for n in range(self.N)]
 881                # 1d Lagrange polynomials generated by l
 882                # EDIT: do not use chaospy for Lagrange, converts lagrange into monomial, requires
 883                # Vandermonde matrix inversion to find coefficients, which becomes
 884                # very ill conditioned rather quickly. Can no longer use cp.E to compute
 885                # integrals, use GQ instead
 886                # a_1d = [cp.lagrange_polynomial(sampler.xi_1d[n][l[n]]) for n in range(d)]
 887
 888                # N-dimensional grid generated by l
 889                x_l = np.array(list(product(*x_1d)))
 890
 891                # all multi indices of the PCE expansion: k <= l
 892                k_norm = list(product(*[np.arange(1, l[n] + 1) for n in range(self.N)]))
 893
 894                if verbose:
 895                    logging.debug('Computing PCE coefficients %d / %d' % (count_l, l_norm.shape[0]))
 896
 897                for k in k_norm:
 898                    # product of the PCE basis function or order k - 1 and all
 899                    # Lagrange basis functions in a_1d, per dimension
 900                    # [[phi_k[0]*a_1d[0]], ..., [phi_k[N-1]*a_1d[N-1]]]
 901
 902                    # orthogonal polynomial generated by chaospy
 903                    phi_k = [cp.expansion.stieltjes(k[n] - 1,
 904                                                    dist=self.sampler.params_distribution[n],
 905                                                    normed=True)[-1] for n in range(self.N)]
 906
 907                    # the polynomial order of each integrand phi_k*a_j = (k - 1) + (number of
 908                    # colloc. points - 1)
 909                    orders = [(k[n] - 1) + (self.xi_1d[n][l[n]].size - 1) for n in range(self.N)]
 910
 911                    # will hold the products of PCE basis functions phi_k and lagrange
 912                    # interpolation polynomials a_1d
 913                    cross_prod = []
 914
 915                    for n in range(self.N):
 916                        # GQ using n points can exactly integrate polynomials of order 2n-1:
 917                        # solve for required number of points n when given order
 918                        n_quad_points = int(np.ceil((orders[n] + 1) / 2))
 919
 920                        # generate Gaussian quad rule
 921                        if isinstance(self.sampler.params_distribution[n], cp.DiscreteUniform):
 922                            xi = self.xi_1d[n][l[n]]
 923                            wi = self.wi_1d[n][l[n]]
 924                        else:
 925                            xi, wi = cp.generate_quadrature(
 926                                n_quad_points - 1, self.sampler.params_distribution[n], rule="G")
 927                            xi = xi[0]
 928
 929                        # number of colloc points = number of Lagrange polynomials
 930                        n_lagrange_poly = int(self.xi_1d[n][l[n]].size)
 931
 932                        # compute the v coefficients = coefficients of SC2PCE mapping
 933                        v_coefs_n = []
 934                        for j in range(n_lagrange_poly):
 935                            # compute values of Lagrange polys at quadrature points
 936                            l_j = np.array([lagrange_poly(xi[i], self.xi_1d[n][l[n]], j)
 937                                            for i in range(xi.size)])
 938                            # each coef is the integral of the lagrange poly times the current
 939                            # orthogonal PCE poly
 940                            v_coefs_n.append(np.sum(l_j * phi_k[n](xi) * wi))
 941                        cross_prod.append(v_coefs_n)
 942
 943                    # tensor product of all integrals
 944                    integrals = np.array(list(product(*cross_prod)))
 945                    # multiply over the number of parameters: v_prod = v_k1_j1 * ... * v_kd_jd
 946                    v_prod = np.prod(integrals, axis=1)
 947                    v_prod = v_prod.reshape([v_prod.size, 1])
 948
 949                    # find corresponding code values
 950                    f_k = np.array([samples[np.where((x == xi_d).all(axis=1))[0][0]] for x in x_l])
 951
 952                    # the sum of all code sample * v_{k,j_1} * ... * v_{k,j_N}
 953                    # equals the PCE coefficient
 954                    eta_k = np.sum(f_k * v_prod, axis=0).T
 955
 956                    pce_coefs[tuple(l)][tuple(k)] = eta_k
 957            else:
 958                # pce coefs previously computed, just copy result
 959                pce_coefs[tuple(l)] = self.pce_coefs[qoi][tuple(l)]
 960            count_l += 1
 961
 962        logging.debug('done')
 963        return pce_coefs
 964
 965    def generalized_pce_coefs(self, l_norm, pce_coefs, comb_coef):
 966        """
 967        Computes the generalized PCE coefficients, defined as the linear combibation
 968        of PCE coefficients which make it possible to write the dimension-adaptive
 969        PCE expansion in standard form. See DOI: 10.13140/RG.2.2.18085.58083/1
 970
 971        Parameters
 972        ----------
 973        l_norm : array
 974            array of quadrature order multi indices
 975        pce_coefs : tuple
 976            tuple of PCE coefficients computed by SC2PCE subroutine
 977        comb_coef : tuple
 978            tuple of combination coefficients computed by compute_comb_coef
 979
 980        Returns
 981        -------
 982        gen_pce_coefs : tuple
 983            The generalized PCE coefficients, indexed per multi index.
 984
 985        """
 986        assert self.sparse, "Generalized PCE coeffcients are computed only for sparse grids"
 987
 988        # the set of all forward neighbours of l: {k | k >= l}
 989        F_l = {}
 990        # the generalized PCE coefs, which turn the adaptive PCE into a standard PCE expansion
 991        gen_pce_coefs = {}
 992        for l in l_norm:
 993            # {indices of k | k >= l}
 994            idx = np.where((l <= l_norm).all(axis=1))[0]
 995            F_l[tuple(l)] = l_norm[idx]
 996
 997            # the generalized PCE coefs are comb_coef[k] * pce_coefs[k][l], summed over k
 998            # for a fixed l
 999            gen_pce_coefs[tuple(l)] = 0.0
1000            for k in F_l[tuple(l)]:
1001                gen_pce_coefs[tuple(l)] += comb_coef[tuple(k)] * pce_coefs[tuple(k)][tuple(l)]
1002
1003        return gen_pce_coefs
1004
1005    def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
1006        """Compute the mean and the variance based on the generalized PCE coefficients
1007        See DOI: 10.13140/RG.2.2.18085.58083/1
1008
1009        Parameters
1010        ----------
1011        l_norm : array
1012            array of quadrature order multi indices
1013        pce_coefs : tuple
1014            tuple of PCE coefficients computed by SC2PCE subroutine
1015        comb_coef : tuple
1016            tuple of combination coefficients computed by compute_comb_coef
1017
1018        Returns
1019        -------
1020        mean, variance and generalized pce coefficients
1021        """
1022
1023        gen_pce_coefs = self.generalized_pce_coefs(l_norm, pce_coefs, comb_coef)
1024
1025        # with the generalized pce coefs, the standard PCE formulas for the mean and var
1026        # can be used for the dimension-adaptive PCE
1027
1028        # the PCE mean is just the 1st generalized PCE coef
1029        l1 = tuple(np.ones(self.N, dtype=int))
1030        mean = gen_pce_coefs[l1]
1031
1032        # the variance is the sum of the squared generalized PCE coefs, excluding the 1st coef
1033        D = 0.0
1034        for l in l_norm[1:]:
1035            D += gen_pce_coefs[tuple(l)] ** 2
1036
1037        if type(D) is float:
1038            D = np.array([D])
1039
1040        return mean, D, gen_pce_coefs
1041
1042    def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
1043        """Computes Sobol indices using Polynomials Chaos coefficients. These
1044        coefficients are computed from the SC expansion via a transformation
1045        of basis (SC2PCE subroutine). This works better than computing the
1046        Sobol indices directly from the SC expansion in the case of the
1047        dimension-adaptive sampler. See DOI: 10.13140/RG.2.2.18085.58083/1
1048
1049        Method: J.D. Jakeman et al, "Adaptive multi-index collocation
1050        for uncertainty quantification and sensitivity analysis", 2019.
1051        (Page 18)
1052
1053        Parameters
1054        ----------
1055        qoi : str
1056            name of the Quantity of Interest for which to compute the indices
1057        typ : str
1058            Default = 'first_order'. 'all' is also possible
1059        **kwargs : dict
1060            if this contains 'samples', use these instead of the SC samples ]
1061            in the database
1062
1063        Returns
1064        -------
1065        Tuple
1066            Mean: PCE mean
1067            Var: PCE variance
1068            S_u: PCE Sobol indices, either the first order indices or all indices
1069        """
1070
1071        if 'samples' in kwargs:
1072            samples = kwargs['samples']
1073            N_qoi = samples[0].size
1074        else:
1075            samples = self.samples[qoi]
1076            N_qoi = self.N_qoi[qoi]
1077
1078        # compute the (generalized) PCE coefficients and stats
1079        self.pce_coefs[qoi] = self.SC2PCE(samples, qoi)
1080        mean, D, gen_pce_coefs = self.get_pce_stats(
1081            self.l_norm, self.pce_coefs[qoi], self.comb_coef)
1082
1083        logging.debug('Computing Sobol indices...')
1084        # Universe = (0, 1, ..., N - 1)
1085        U = np.arange(self.N)
1086
1087        # the powerset of U for either the first order or all Sobol indices
1088        if typ == 'first_order':
1089            P = [()]
1090            for i in range(self.N):
1091                P.append((i,))
1092        else:
1093            # all indices u
1094            P = list(powerset(U))
1095
1096        # dict to hold the partial Sobol variances and Sobol indices
1097        D_u = {}
1098        S_u = {}
1099        for u in P[1:]:
1100            # complement of u
1101            u_prime = np.delete(U, u)
1102            k = []
1103            D_u[u] = np.zeros(N_qoi)
1104            S_u[u] = np.zeros(N_qoi)
1105
1106            # compute the set of multi indices corresponding to varying ONLY
1107            # the inputs indexed by u
1108            for l in self.l_norm:
1109                # assume l_i = 1 for all i in u' until found otherwise
1110                all_ones = True
1111                for i_up in u_prime:
1112                    if l[i_up] != 1:
1113                        all_ones = False
1114                        break
1115                # if l_i = 1 for all i in u'
1116                if all_ones:
1117                    # assume all l_i for i in u are > 1
1118                    all_gt_one = True
1119                    for i_u in u:
1120                        if l[i_u] == 1:
1121                            all_gt_one = False
1122                            break
1123                    # if both conditions above are True, the current l varies
1124                    # only inputs indexed by u, add this l to k
1125                    if all_gt_one:
1126                        k.append(l)
1127
1128            logging.debug('Multi indices of dimension  %s are %s' % (u, k))
1129            # the partial variance of u is the sum of all variances index by k
1130            for k_u in k:
1131                D_u[u] = D_u[u] + gen_pce_coefs[tuple(k_u)] ** 2
1132
1133            # normalize D_u by total variance D to get the Sobol index
1134            if 0 in D:
1135                with warnings.catch_warnings():
1136                    # ignore divide by zero warning
1137                    warnings.simplefilter("ignore")
1138                    S_u[u] = D_u[u] / D    
1139            else:
1140                S_u[u] = D_u[u] / D
1141
1142        logging.debug('done')
1143        return mean, D, D_u, S_u
1144
1145    # Start SC specific methods
1146
1147    @staticmethod
1148    def compute_tensor_prod_u(xi, wi, u, u_prime):
1149        """
1150        Calculate tensor products of weights and collocation points
1151        with dimension of u and u'
1152
1153        Parameters
1154        ----------
1155        xi (array of floats): 1D colloction points
1156        wi (array of floats): 1D quadrature weights
1157        u  (array of int): dimensions
1158        u_prime (array of int): remaining dimensions (u union u' = range(N))
1159
1160        Returns
1161        dict of tensor products of weight and points for dimensions u and u'
1162        -------
1163
1164        """
1165
1166        # tensor products with dimension of u
1167        xi_u = [xi[key] for key in u]
1168        wi_u = [wi[key] for key in u]
1169
1170        xi_d_u = np.array(list(product(*xi_u)))
1171        wi_d_u = np.array(list(product(*wi_u)))
1172
1173        # tensor products with dimension of u' (complement of u)
1174        xi_u_prime = [xi[key] for key in u_prime]
1175        wi_u_prime = [wi[key] for key in u_prime]
1176
1177        xi_d_u_prime = np.array(list(product(*xi_u_prime)))
1178        wi_d_u_prime = np.array(list(product(*wi_u_prime)))
1179
1180        return xi_d_u, wi_d_u, xi_d_u_prime, wi_d_u_prime
1181
1182    def compute_marginal(self, qoi, u, u_prime, diff):
1183        """
1184        Computes a marginal integral of the qoi(x) over the dimension defined
1185        by u_prime, for every x value in dimensions u
1186
1187        Parameters
1188        ----------
1189        - qoi (str): name of the quantity of interest
1190        - u (array of int): dimensions which are not integrated
1191        - u_prime (array of int): dimensions which are integrated
1192        - diff (array of int): levels
1193
1194        Returns
1195        - Values of the marginal integral
1196        -------
1197
1198        """
1199        # 1d weights and points of the levels in diff
1200        xi = [self.xi_1d[n][np.abs(diff)[n]] for n in range(self.N)]
1201        wi = [self.wi_1d[n][np.abs(diff)[n]] for n in range(self.N)]
1202
1203        # compute tensor products and weights in dimension u and u'
1204        xi_d_u, wi_d_u, xi_d_u_prime, wi_d_u_prime =\
1205            self.compute_tensor_prod_u(xi, wi, u, u_prime)
1206
1207        idxs = np.argsort(np.concatenate((u, u_prime)))
1208        # marginals h = f*w' integrated over u', so cardinality is that of u
1209        h = [0.0] * xi_d_u.shape[0]
1210        for i_u, xi_d_u_ in enumerate(xi_d_u):
1211            for i_up, xi_d_u_prime_ in enumerate(xi_d_u_prime):
1212                xi_s = np.concatenate((xi_d_u_, xi_d_u_prime_))[idxs]
1213                # find the index of the corresponding code sample
1214                idx = np.where(np.prod(self.xi_d == xi_s, axis=1))[0][0]
1215                # perform quadrature
1216                q_k = self.samples[qoi][idx]
1217                h[i_u] += q_k * wi_d_u_prime[i_up].prod()
1218        # return marginal and the weights of dimensions u
1219        return h, wi_d_u
1220
1221    def get_sobol_indices(self, qoi, typ='first_order'):
1222        """
1223        Computes Sobol indices using Stochastic Collocation. Method:
1224        Tang (2009), GLOBAL SENSITIVITY ANALYSIS  FOR STOCHASTIC COLLOCATION
1225        EXPANSION.
1226
1227        Parameters
1228        ----------
1229        qoi (str): name of the Quantity of Interest for which to compute the indices
1230        typ (str): Default = 'first_order'. 'all' is also possible
1231
1232        Returns
1233        -------
1234        Either the first order or all Sobol indices of qoi
1235        """
1236        logging.debug('Computing Sobol indices...')
1237        # multi indices
1238        U = np.arange(self.N)
1239
1240        if typ == 'first_order':
1241            P = list(powerset(U))[0:self.N + 1]
1242        elif typ == 'all':
1243            # all indices u
1244            P = list(powerset(U))
1245        else:
1246            logging.debug('Specified Sobol Index type %s not recognized' % typ)
1247            logging.debug('Accepted are first_order or all')
1248            import sys
1249            sys.exit()
1250
1251        # get first two moments
1252        mu, D = self.get_moments(qoi)
1253
1254        # partial variances
1255        D_u = {P[0]: mu**2}
1256
1257        sobol = {}
1258
1259        for u in P[1:]:
1260
1261            # complement of u
1262            u_prime = np.delete(U, u)
1263            D_u[u] = 0.0
1264
1265            for l in self.l_norm:
1266
1267                # expand the multi-index indices of the tensor product
1268                # (Q^1_{i1} - Q^1_{i1-1}) X ... X (Q^1_{id) - Q^1_{id-1})
1269                diff_idx = np.array(list(product(*[[k, -(k - 1)] for k in l])))
1270
1271                # perform analysis on each Q^1_l1 X ... X Q^1_l_N tensor prod
1272                for diff in diff_idx:
1273
1274                    # if any Q^1_li is below the minimim level, Q^1_li is defined
1275                    # as zero: do not compute this Q^1_l1 X ... X Q^1_l_N tensor prod
1276                    if not (np.abs(diff) < self.l_norm_min).any():
1277
1278                        # mariginal integral h, integrate over dimensions u'
1279                        h, wi_d_u = self.compute_marginal(qoi, u, u_prime, diff)
1280
1281                        # square result and integrate over remaining dimensions u
1282                        for i_u in range(wi_d_u.shape[0]):
1283                            D_u[u] += np.sign(np.prod(diff)) * h[i_u]**2 * wi_d_u[i_u].prod()
1284
1285                # D_u[u] = D_u[u].flatten()
1286
1287            # all subsets of u
1288            W = list(powerset(u))[0:-1]
1289
1290            # partial variance of u
1291            for w in W:
1292                D_u[u] -= D_u[w]
1293
1294            # compute Sobol index, only include points where D > 0
1295            if 0 in D:
1296                with warnings.catch_warnings():
1297                    # ignore divide by zero warning
1298                    warnings.simplefilter("ignore")
1299                    sobol[u] = D_u[u] / D                  
1300            else:
1301                sobol[u] = D_u[u] / D
1302
1303        logging.debug('done.')
1304        return sobol
1305
1306    def get_uncertainty_amplification(self, qoi):
1307        """
1308        Computes a measure that signifies the ratio of output to input
1309        uncertainty. It is computed as the (mean) Coefficient of Variation (V)
1310        of the output divided by the (mean) CV of the input.
1311
1312        Parameters
1313        ----------
1314        qoi (string): name of the Quantity of Interest
1315
1316        Returns
1317        -------
1318        blowup (float): the ratio output CV / input CV
1319
1320        """
1321
1322        mean_f, var_f = self.get_moments(qoi)
1323        std_f = np.sqrt(var_f)
1324
1325        mean_xi = []
1326        std_xi = []
1327        CV_xi = []
1328        for param in self.sampler.params_distribution:
1329            E = cp.E(param)
1330            Std = cp.Std(param)
1331            mean_xi.append(E)
1332            std_xi.append(Std)
1333            CV_xi.append(Std / E)
1334
1335        CV_in = np.mean(CV_xi)
1336        CV_out = std_f / mean_f
1337        idx = np.where(np.isnan(CV_out) == False)[0]
1338        CV_out = np.mean(CV_out[idx])
1339        blowup = CV_out / CV_in
1340
1341        print('-----------------')
1342        print('Mean CV input = %.4f %%' % (100 * CV_in, ))
1343        print('Mean CV output = %.4f %%' % (100 * CV_out, ))
1344        print('Uncertainty amplification factor = %.4f/%.4f = %.4f' %
1345              (CV_out, CV_in, blowup))
1346        print('-----------------')
1347
1348        return blowup
1349
1350
1351def powerset(iterable):
1352    """powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)
1353
1354    Taken from: https://docs.python.org/3/library/itertools.html#recipes
1355
1356    Parameters
1357    ----------
1358    iterable : iterable
1359        Input sequence
1360
1361    Returns
1362    -------
1363
1364    """
1365
1366    s = list(iterable)
1367    return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
1368
1369
1370def lagrange_poly(x, x_i, j):
1371    """Lagrange polynomials used for interpolation
1372
1373    l_j(x) = product(x - x_m / x_j - x_m) with 0 <= m <= k
1374                                               and m !=j
1375
1376    Parameters
1377    ----------
1378    x : float
1379        location at which to compute the polynomial
1380
1381    x_i : list or array of float
1382        nodes of the Lagrange polynomials
1383
1384    j : int
1385        index of node at which l_j(x_j) = 1
1386
1387    Returns
1388    -------
1389    float
1390        l_j(x) calculated as shown above.
1391    """
1392    l_j = 1.0
1393
1394    for m in range(len(x_i)):
1395
1396        if m != j:
1397            denom = x_i[j] - x_i[m]
1398            nom = x - x_i[m]
1399
1400            l_j *= nom / denom
1401
1402    return l_j
1403    # implementation below is more beautiful, but slower
1404    # x_i_ = np.delete(x_i, j)
1405    # return np.prod((x - x_i_) / (x_i[j] - x_i_))
1406
1407
1408def setdiff2d(X, Y):
1409    """
1410    Computes the difference of two 2D arrays X and Y
1411
1412    Parameters
1413    ----------
1414    X : 2D numpy array
1415    Y : 2D numpy array
1416
1417    Returns
1418    -------
1419    The difference X \\ Y as a 2D array
1420
1421    """
1422    diff = set(map(tuple, X)) - set(map(tuple, Y))
1423    return np.array(list(diff))

class SCAnalysisResults(easyvvuq.analysis.results.AnalysisResults): View Source

42class SCAnalysisResults(AnalysisResults):
43    def _get_sobols_first(self, qoi, input_):
44        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
45        result = raw_dict[AnalysisResults._to_tuple(qoi)][input_]
46        return np.array(result)
47        # try:
48        #     # return np.array([float(result)])
49        #     return np.array([result[0]])
50        # except TypeError:
51        #     return np.array(result)
52
53    def supported_stats(self):
54        """Types of statistics supported by the describe method.
55
56        Returns
57        -------
58        list of str
59        """
60        return ['mean', 'var', 'std']
61
62    def _describe(self, qoi, statistic):
63        if statistic in self.supported_stats():
64            return self.raw_data['statistical_moments'][qoi][statistic]
65        else:
66            raise NotImplementedError
67
68    def surrogate(self):
69        """Return an SC surrogate model.
70
71        Returns
72        -------
73        A function that takes a dictionary of parameter - value pairs and returns
74        a dictionary with the results (same output as decoder).
75        """
76        def surrogate_fn(inputs):
77            def swap(x):
78                if len(x) > 1:
79                    return list(x)
80                else:
81                    return x[0]
82            values = np.squeeze(np.array([inputs[key] for key in self.inputs])).T
83            results = dict([(qoi, swap(self.surrogate_(qoi, values))) for qoi in self.qois])
84            return results
85        return surrogate_fn

Contains the analysis results.

Parameters

raw_data (obj): An arbitrary object that contains raw analysis data.
samples (pandas DataFrame): Collated samples.
qois (list of str): List of qoi names used during the analysis.
inputs (list of str): List of input names used during the analysis.

def supported_stats(self): View Source

53    def supported_stats(self):
54        """Types of statistics supported by the describe method.
55
56        Returns
57        -------
58        list of str
59        """
60        return ['mean', 'var', 'std']

Types of statistics supported by the describe method.

Returns

list of str

def surrogate(self): View Source

68    def surrogate(self):
69        """Return an SC surrogate model.
70
71        Returns
72        -------
73        A function that takes a dictionary of parameter - value pairs and returns
74        a dictionary with the results (same output as decoder).
75        """
76        def surrogate_fn(inputs):
77            def swap(x):
78                if len(x) > 1:
79                    return list(x)
80                else:
81                    return x[0]
82            values = np.squeeze(np.array([inputs[key] for key in self.inputs])).T
83            results = dict([(qoi, swap(self.surrogate_(qoi, values))) for qoi in self.qois])
84            return results
85        return surrogate_fn

Return an SC surrogate model.

Returns

A function that takes a dictionary of parameter - value pairs and returns
a dictionary with the results (same output as decoder).

Inherited Members

easyvvuq.analysis.results.AnalysisResults: AnalysisResults; raw_data; samples; qois; inputs; derivatives_first; sobols_first; sobols_second; sobols_total; describe; plot_sobols_treemap; plot_sobols_first; plot_moments; get_distribution

class SCAnalysis(easyvvuq.analysis.base.BaseAnalysisElement): View Source

  88class SCAnalysis(BaseAnalysisElement):
  89
  90    def __init__(self, sampler=None, qoi_cols=None):
  91        """
  92        Parameters
  93        ----------
  94        sampler : SCSampler
  95            Sampler used to initiate the SC analysis
  96        qoi_cols : list or None
  97            Column names for quantities of interest (for which analysis is
  98            performed).
  99        """
 100
 101        if sampler is None:
 102            msg = 'SC analysis requires a paired sampler to be passed'
 103            raise RuntimeError(msg)
 104
 105        if qoi_cols is None:
 106            raise RuntimeError("Analysis element requires a list of "
 107                               "quantities of interest (qoi)")
 108
 109        self.qoi_cols = qoi_cols
 110        self.output_type = OutputType.SUMMARY
 111        self.sampler = sampler
 112        self.dimension_adaptive = sampler.dimension_adaptive
 113        if self.dimension_adaptive:
 114            self.adaptation_errors = []
 115            self.mean_history = []
 116            self.std_history = []
 117        self.sparse = sampler.sparse
 118        self.pce_coefs = {}
 119        self.N_qoi = {}
 120        for qoi_k in qoi_cols:
 121            self.pce_coefs[qoi_k] = {}
 122            self.N_qoi[qoi_k] = 0
 123
 124    def element_name(self):
 125        """Name for this element for logging purposes"""
 126        return "SC_Analysis"
 127
 128    def element_version(self):
 129        """Version of this element for logging purposes"""
 130        return "0.5"
 131
 132    def save_state(self, filename):
 133        """Saves the complete state of the analysis object to a pickle file,
 134        except the sampler object (self.samples).
 135
 136        Parameters
 137        ----------
 138        filename : string
 139            name to the file to write the state to
 140        """
 141        logging.debug("Saving analysis state to %s" % filename)
 142        # make a copy of the state, and do not store the sampler as well
 143        state = copy.copy(self.__dict__)
 144        del state['sampler']
 145        file = open(filename, 'wb')
 146        pickle.dump(state, file)
 147        file.close()
 148
 149    def load_state(self, filename):
 150        """Loads the complete state of the analysis object from a
 151        pickle file, stored using save_state.
 152
 153        Parameters
 154        ----------
 155        filename : string
 156            name of the file to load
 157        """
 158        logging.debug("Loading analysis state from %s" % filename)
 159        file = open(filename, 'rb')
 160        state = pickle.load(file)
 161        for key in state.keys():
 162            self.__dict__[key] = state[key]
 163        file.close()
 164
 165    def analyse(self, data_frame=None, compute_moments=True, compute_Sobols=True):
 166        """Perform SC analysis on input `data_frame`.
 167
 168        Parameters
 169        ----------
 170        data_frame : pandas.DataFrame
 171            Input data for analysis.
 172
 173        Returns
 174        -------
 175        dict
 176            Results dictionary with sub-dicts with keys:
 177            ['statistical_moments', 'sobol_indices'].
 178            Each dict has an entry for each item in `qoi_cols`.
 179        """
 180
 181        if data_frame is None:
 182            raise RuntimeError("Analysis element needs a data frame to "
 183                               "analyse")
 184        elif isinstance(data_frame, pd.DataFrame) and data_frame.empty:
 185            raise RuntimeError(
 186                "No data in data frame passed to analyse element")
 187
 188        # the number of uncertain parameters
 189        self.N = self.sampler.N
 190        # tensor grid
 191        self.xi_d = self.sampler.xi_d
 192        # the maximum level (quad order) of the (sparse) grid
 193        self.L = self.sampler.L
 194
 195        # if L < L_min: quadratures and interpolations are zero
 196        # For full tensor grid: there is only one level: L_min = L
 197        if not self.sparse:
 198            self.L_min = self.L
 199            self.l_norm = np.array([self.sampler.polynomial_order])
 200            self.l_norm_min = self.l_norm
 201        # For sparse grid: one or more levels
 202        else:
 203            self.L_min = 1
 204            # multi indices (stored in l_norm) for isotropic sparse grid or
 205            # dimension-adaptive grid before the 1st refinement.
 206            # If dimension_adaptive and nadaptations > 0: l_norm
 207            # is computed in self.adaptation_metric
 208            if not self.dimension_adaptive or self.sampler.nadaptations == 0:
 209                # the maximum level (quad order) of the (sparse) grid
 210                self.l_norm = self.sampler.compute_sparse_multi_idx(self.L, self.N)
 211
 212            self.l_norm_min = np.ones(self.N, dtype=int)
 213
 214        # #compute generalized combination coefficients
 215        self.comb_coef = self.compute_comb_coef()
 216
 217        # 1d weights and points per level
 218        self.xi_1d = self.sampler.xi_1d
 219        # self.wi_1d = self.compute_SC_weights(rule=self.sampler.quad_rule)
 220        self.wi_1d = self.sampler.wi_1d
 221
 222        # Extract output values for each quantity of interest from Dataframe
 223        logging.debug('Loading samples...')
 224        qoi_cols = self.qoi_cols
 225        samples = {k: [] for k in qoi_cols}
 226        for run_id in data_frame[('run_id', 0)].unique():
 227            for k in qoi_cols:
 228                values = data_frame.loc[data_frame[('run_id', 0)] == run_id][k].values
 229                samples[k].append(values.flatten())
 230        self.samples = samples
 231        logging.debug('done')
 232
 233        # assume that self.l_norm has changed, and that the interpolation
 234        # must be initialised, see sc_expansion subroutine
 235        self.init_interpolation = True
 236
 237        # same pce coefs must be computed for every qoi
 238        if self.sparse:
 239            for qoi_k in qoi_cols:
 240                self.pce_coefs[qoi_k] = self.SC2PCE(samples[qoi_k], qoi_k)
 241                # size of one code sample
 242                self.N_qoi[qoi_k] = self.samples[qoi_k][0].size
 243
 244        # Compute descriptive statistics for each quantity of interest
 245        results = {'statistical_moments': {},
 246                   'sobols_first': {k: {} for k in self.qoi_cols},
 247                   'sobols': {k: {} for k in self.qoi_cols}}
 248
 249        if compute_moments:
 250            for qoi_k in qoi_cols:
 251                if not self.sparse:
 252                    mean_k, var_k = self.get_moments(qoi_k)
 253                    std_k = np.sqrt(var_k)
 254                else:
 255                    self.pce_coefs[qoi_k] = self.SC2PCE(self.samples[qoi_k], qoi_k)
 256                    mean_k, var_k, _ = self.get_pce_stats(self.l_norm, self.pce_coefs[qoi_k],
 257                                                          self.comb_coef)
 258                    std_k = np.sqrt(var_k)
 259
 260                # compute statistical moments
 261                results['statistical_moments'][qoi_k] = {'mean': mean_k,
 262                                                         'var': var_k,
 263                                                         'std': std_k}
 264
 265        if compute_Sobols:
 266            for qoi_k in qoi_cols:
 267                if not self.sparse:
 268                    results['sobols'][qoi_k] = self.get_sobol_indices(qoi_k, 'first_order')
 269                else:
 270                    _, _, _, results['sobols'][qoi_k] = self.get_pce_sobol_indices(
 271                        qoi_k, 'first_order')
 272
 273                for idx, param_name in enumerate(self.sampler.vary.get_keys()):
 274                    results['sobols_first'][qoi_k][param_name] = \
 275                        results['sobols'][qoi_k][(idx,)]
 276
 277        results = SCAnalysisResults(raw_data=results, samples=data_frame,
 278                                    qois=qoi_cols, inputs=list(self.sampler.vary.get_keys()))
 279        results.surrogate_ = self.surrogate
 280        return results
 281
 282    def compute_comb_coef(self, **kwargs):
 283        """Compute general combination coefficients. These are the coefficients
 284        multiplying the tensor products associated to each multi index l,
 285        see page 12 Gerstner & Griebel, numerical integration using sparse grids
 286        """
 287
 288        if 'l_norm' in kwargs:
 289            l_norm = kwargs['l_norm']
 290        else:
 291            l_norm = self.l_norm
 292
 293        comb_coef = {}
 294        logging.debug('Computing combination coefficients...')
 295        for k in l_norm:
 296            coef = 0.0
 297            # for every k, subtract all multi indices
 298            for l in l_norm:
 299                z = l - k
 300                # if the results contains only 0's and 1's, then z is the
 301                # vector that can be formed from a tensor product of unit vectors
 302                # for which k+z is in self.l_norm
 303                if np.array_equal(z, z.astype(bool)):
 304                    coef += (-1)**(np.sum(z))
 305            comb_coef[tuple(k)] = coef
 306        logging.debug('done')
 307        return comb_coef
 308
 309    def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
 310                        method='surplus', **kwargs):
 311        """Compute the adaptation metric and decide which of the admissible
 312        level indices to include in next iteration of the sparse grid. The
 313        adaptation metric is based on the hierarchical surplus, defined as the
 314        difference between the new code values of the admissible level indices,
 315        and the SC surrogate of the previous iteration. Alternatively, it can be
 316        based on the difference between the output mean of the current level,
 317        and the mean computed with one extra admissible index.
 318
 319        This subroutine must be called AFTER the code is evaluated at
 320        the new points, but BEFORE the analysis is performed.
 321
 322        Parameters
 323        ----------
 324        qoi : string
 325            the name of the quantity of interest which is used
 326            to base the adaptation metric on.
 327        data_frame : pandas.DataFrame
 328        store_stats_history : bool
 329            store the mean and variance at each refinement in self.mean_history
 330            and self.std_history. Used for checking convergence in the statistics
 331            over the refinement iterations
 332        method : string
 333            name of the refinement error, default is 'surplus'. In this case the
 334            error is based on the hierarchical surplus, which is an interpolation
 335            based error. Another possibility is 'var',
 336            in which case the error is based on the difference in the
 337            variance between the current estimate and the estimate obtained
 338            when a particular candidate direction is added.
 339        """
 340        logging.debug('Refining sampling plan...')
 341        # load the code samples
 342        samples = []
 343        if isinstance(data_frame, pd.DataFrame):
 344            for run_id in data_frame[('run_id', 0)].unique():
 345                values = data_frame.loc[data_frame[('run_id', 0)] == run_id][qoi].values
 346                samples.append(values.flatten())
 347
 348        if method == 'var':
 349            all_idx = np.concatenate((self.l_norm, self.sampler.admissible_idx))
 350            self.xi_1d = self.sampler.xi_1d
 351            self.wi_1d = self.sampler.wi_1d
 352            self.pce_coefs[qoi] = self.SC2PCE(samples, qoi, verbose=True, l_norm=all_idx,
 353                                              xi_d=self.sampler.xi_d)
 354            _, var_l, _ = self.get_pce_stats(self.l_norm, self.pce_coefs[qoi], self.comb_coef)
 355
 356        # the currently accepted grid points
 357        xi_d_accepted = self.sampler.generate_grid(self.l_norm)
 358
 359        # compute the hierarchical surplus based error for every admissible l
 360        error = {}
 361        for l in self.sampler.admissible_idx:
 362            error[tuple(l)] = []
 363            # compute the error based on the hierarchical surplus (interpolation based)
 364            if method == 'surplus':
 365                # collocation points of current level index l
 366                X_l = [self.sampler.xi_1d[n][l[n]] for n in range(self.N)]
 367                X_l = np.array(list(product(*X_l)))
 368                # only consider new points, subtract the accepted points
 369                X_l = setdiff2d(X_l, xi_d_accepted)
 370                for xi in X_l:
 371                    # find the location of the current xi in the global grid
 372                    idx = np.where((xi == self.sampler.xi_d).all(axis=1))[0][0]
 373                    # hierarchical surplus error at xi
 374                    hier_surplus = samples[idx] - self.surrogate(qoi, xi)
 375                    if 'index' in kwargs:
 376                        hier_surplus = hier_surplus[kwargs['index']]
 377                        error[tuple(l)].append(np.abs(hier_surplus))
 378                    else:
 379                        error[tuple(l)].append(np.linalg.norm(hier_surplus, np.inf))
 380                # compute mean error over all points in X_l
 381                error[tuple(l)] = np.mean(error[tuple(l)])
 382            # compute the error based on quadrature of the variance
 383            elif method == 'var':
 384                # create a candidate set of multi indices by adding the current
 385                # admissible index to l_norm
 386                candidate_l_norm = np.concatenate((self.l_norm, l.reshape([1, self.N])))
 387                # now we must recompute the combination coefficients
 388                c_l = self.compute_comb_coef(l_norm=candidate_l_norm)
 389                _, var_candidate_l, _ = self.get_pce_stats(
 390                    candidate_l_norm, self.pce_coefs[qoi], c_l)
 391                # error in var
 392                error[tuple(l)] = np.linalg.norm(var_candidate_l - var_l, np.inf)
 393            else:
 394                logging.debug('Specified refinement method %s not recognized' % method)
 395                logging.debug('Accepted are surplus, mean or var')
 396                import sys
 397                sys.exit()
 398
 399        for key in error.keys():
 400            # logging.debug("Surplus error when l = %s is %s" % (key, error[key]))
 401            logging.debug("Refinement error for l = %s is %s" % (key, error[key]))
 402        # find the admissble index with the largest error
 403        l_star = np.array(max(error, key=error.get)).reshape([1, self.N])
 404        # logging.debug('Selecting %s for refinement.' % l_star)
 405        logging.debug('Selecting %s for refinement.' % l_star)
 406        # add max error to list
 407        self.adaptation_errors.append(max(error.values()))
 408
 409        # add l_star to the current accepted level indices
 410        self.l_norm = np.concatenate((self.l_norm, l_star))
 411        # if someone executes this function twice for some reason,
 412        # remove the duplicate l_star entry. Keep order unaltered
 413        idx = np.unique(self.l_norm, axis=0, return_index=True)[1]
 414        self.l_norm = np.array([self.l_norm[i] for i in sorted(idx)])
 415
 416        # peform the analyse step, but do not compute moments and Sobols
 417        self.analyse(data_frame, compute_moments=False, compute_Sobols=False)
 418
 419        # if True store the mean and variance at eacht iteration of the adaptive
 420        # algorithmn
 421        if store_stats_history:
 422            # mean_f, var_f = self.get_moments(qoi)
 423            logging.debug('Storing moments of iteration %d' % self.sampler.nadaptations)
 424            pce_coefs = self.SC2PCE(samples, qoi, verbose=True)
 425            mean_f, var_f, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
 426            self.mean_history.append(mean_f)
 427            self.std_history.append(var_f)
 428            logging.debug('done')
 429
 430    def merge_accepted_and_admissible(self, level=0, **kwargs):
 431        """In the case of the dimension-adaptive sampler, there are 2 sets of
 432        quadrature multi indices. There are the accepted indices that are actually
 433        used in the analysis, and the admissible indices, of which some might
 434        move to the accepted set in subsequent iterations. This subroutine merges
 435        the two sets of multi indices by moving all admissible to the set of
 436        accepted indices.
 437        Do this at the end, when no more refinements will be executed. The
 438        samples related to the admissble indices are already computed, although
 439        not used in the analysis. By executing this subroutine at very end, all
 440        computed samples are used during the final postprocessing stage. Execute
 441        campaign.apply_analysis to let the new set of indices take effect.
 442        If further refinements are executed after all via sampler.look_ahead, the
 443        number of new admissible samples to be computed can be very high,
 444        especially in high dimensions. It is possible to undo the merge via
 445        analysis.undo_merge before new refinements are made. Execute
 446        campaign.apply_analysis again to let the old set of indices take effect.
 447        """
 448
 449        if 'include' in kwargs:
 450            include = kwargs['include']
 451        else:
 452            include = np.arange(self.N)
 453
 454        if self.sampler.dimension_adaptive:
 455            logging.debug('Moving admissible indices to the accepted set...')
 456            # make a backup of l_norm, such that undo_merge can revert back
 457            self.l_norm_backup = np.copy(self.l_norm)
 458            # merge admissible and accepted multi indices
 459            if level == 0:
 460                merged_l = np.concatenate((self.l_norm, self.sampler.admissible_idx))
 461            else:
 462                admissible_idx = []
 463                count = 0
 464                for l in self.sampler.admissible_idx:
 465                    L = np.sum(l) - self.N + 1
 466                    tmp = np.where(l == L)[0]
 467                    if L <= level and np.in1d(tmp, include)[0]:
 468                        admissible_idx.append(l)
 469                        count += 1
 470                admissible_idx = np.array(admissible_idx).reshape([count, self.N])
 471                merged_l = np.concatenate((self.l_norm, admissible_idx))
 472            # make sure final result contains only unique indices and store
 473            # results in l_norm
 474            idx = np.unique(merged_l, axis=0, return_index=True)[1]
 475            # return np.array([merged_l[i] for i in sorted(idx)])
 476            self.l_norm = np.array([merged_l[i] for i in sorted(idx)])
 477            logging.debug('done')
 478
 479    def undo_merge(self):
 480        """This reverses the effect of the merge_accepted_and_admissble subroutine.
 481        Execute if further refinement are required after all.
 482        """
 483        if self.sampler.dimension_adaptive:
 484            self.l_norm = self.l_norm_backup
 485            logging.debug('Restored old multi indices.')
 486
 487    def get_adaptation_errors(self):
 488        """Returns self.adaptation_errors
 489        """
 490        return self.adaptation_errors
 491
 492    def plot_stat_convergence(self):
 493        """Plots the convergence of the statistical mean and std dev over the different
 494        refinements in a dimension-adaptive setting. Specifically the inf norm
 495        of the difference between the stats of iteration i and iteration i-1
 496        is plotted.
 497        """
 498        if not self.dimension_adaptive:
 499            logging.debug('Only works for the dimension adaptive sampler.')
 500            return
 501
 502        K = len(self.mean_history)
 503        if K < 2:
 504            logging.debug('Means from at least two refinements are required')
 505            return
 506        else:
 507            differ_mean = np.zeros(K - 1)
 508            differ_std = np.zeros(K - 1)
 509            for i in range(1, K):
 510                differ_mean[i - 1] = np.linalg.norm(self.mean_history[i] -
 511                                                    self.mean_history[i - 1], np.inf)
 512                # make relative
 513                differ_mean[i - 1] = differ_mean[i - 1] / np.linalg.norm(self.mean_history[i - 1],
 514                                                                         np.inf)
 515
 516                differ_std[i - 1] = np.linalg.norm(self.std_history[i] -
 517                                                   self.std_history[i - 1], np.inf)
 518                # make relative
 519                differ_std[i - 1] = differ_std[i - 1] / np.linalg.norm(self.std_history[i - 1],
 520                                                                       np.inf)
 521
 522        import matplotlib.pyplot as plt
 523        fig = plt.figure('stat_conv')
 524        ax1 = fig.add_subplot(111, title='moment convergence')
 525        ax1.set_xlabel('iteration', fontsize=12)
 526        # ax1.set_ylabel(r'$ ||\mathrm{mean}_i - \mathrm{mean}_{i - 1}||_\infty$',
 527        # color='r', fontsize=12)
 528        ax1.set_ylabel(r'relative error mean', color='r', fontsize=12)
 529        ax1.plot(range(2, K + 1), differ_mean, color='r', marker='+')
 530        ax1.tick_params(axis='y', labelcolor='r')
 531
 532        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
 533
 534        # ax2.set_ylabel(r'$ ||\mathrm{var}_i - \mathrm{var}_{i - 1}||_\infty$',
 535        # color='b', fontsize=12)
 536        ax2.set_ylabel(r'relative error variance', fontsize=12, color='b')
 537        ax2.plot(range(2, K + 1), differ_std, color='b', marker='*')
 538        ax2.tick_params(axis='y', labelcolor='b')
 539
 540        plt.tight_layout()
 541        plt.show()
 542
 543    def surrogate(self, qoi, x, L=None):
 544        """Use sc_expansion UQP as a surrogate
 545
 546        Parameters
 547        ----------
 548        qoi : str
 549            name of the qoi
 550        x : array
 551            location at which to evaluate the surrogate
 552        L : int
 553            level of the (sparse) grid, default = self.L
 554
 555        Returns
 556        -------
 557        the interpolated value of qoi at x (float, (N_qoi,))
 558        """
 559
 560        return self.sc_expansion(self.samples[qoi], x=x)
 561
 562    def quadrature(self, qoi, samples=None):
 563        """Computes a (Smolyak) quadrature
 564
 565        Parameters
 566        ----------
 567        qoi : str
 568            name of the qoi
 569
 570        samples: array
 571            compute the mean by setting samples = self.samples.
 572            To compute the variance, set samples = (self.samples - mean)**2
 573
 574        Returns
 575        -------
 576        the quadrature of qoi
 577        """
 578        if samples is None:
 579            samples = self.samples[qoi]
 580
 581        return self.combination_technique(qoi, samples)
 582
 583    def combination_technique(self, qoi, samples=None, **kwargs):
 584        """Efficient quadrature formulation for (sparse) grids. See:
 585
 586            Gerstner, Griebel, "Numerical integration using sparse grids"
 587            Uses the general combination technique (page 12).
 588
 589        Parameters
 590        ----------
 591        qoi : str
 592            name of the qoi
 593
 594        samples : array
 595            compute the mean by setting samples = self.samples.
 596            To compute the variance, set samples = (self.samples - mean)**2
 597        """
 598
 599        if samples is None:
 600            samples = self.samples[qoi]
 601
 602        # In the case of quadrature-based refinement, we need to specify
 603        # l_norm, comb_coef and xi_d other than the current defualt values
 604        if 'l_norm' in kwargs:
 605            l_norm = kwargs['l_norm']
 606        else:
 607            l_norm = self.l_norm
 608
 609        if 'comb_coef' in kwargs:
 610            comb_coef = kwargs['comb_coef']
 611        else:
 612            comb_coef = self.comb_coef
 613
 614        if 'xi_d' in kwargs:
 615            xi_d = kwargs['xi_d']
 616        else:
 617            xi_d = self.xi_d
 618
 619        # quadrature Q
 620        Q = 0.0
 621
 622        # loop over l
 623        for l in l_norm:
 624            # compute the tensor product of parameter and weight values
 625            X_k = [self.xi_1d[n][l[n]] for n in range(self.N)]
 626            W_k = [self.wi_1d[n][l[n]] for n in range(self.N)]
 627
 628            X_k = np.array(list(product(*X_k)))
 629            W_k = np.array(list(product(*W_k)))
 630            W_k = np.prod(W_k, axis=1)
 631            W_k = W_k.reshape([W_k.shape[0], 1])
 632
 633            # scaling factor of combination technique
 634            W_k = W_k * comb_coef[tuple(l)]
 635
 636            # find corresponding code values
 637            f_k = np.array([samples[np.where((x == xi_d).all(axis=1))[0][0]] for x in X_k])
 638
 639            # quadrature of Q^1_{k1} X ... X Q^1_{kN} product
 640            Q = Q + np.sum(f_k * W_k, axis=0).T
 641
 642        return Q
 643
 644    def get_moments(self, qoi):
 645        """
 646        Parameters
 647        ----------
 648        qoi : str
 649            name of the qoi
 650
 651        Returns
 652        -------
 653        mean and variance of qoi (float (N_qoi,))
 654        """
 655        logging.debug('Computing moments...')
 656        # compute mean
 657        mean_f = self.quadrature(qoi)
 658        # compute variance
 659        variance_samples = [(sample - mean_f)**2 for sample in self.samples[qoi]]
 660        var_f = self.quadrature(qoi, samples=variance_samples)
 661        logging.debug('done')
 662        return mean_f, var_f
 663
 664    def sc_expansion(self, samples, x):
 665        """
 666        Non recursive implementation of the SC expansion. Performs interpolation
 667        of code output samples for both full and sparse grids.
 668
 669        Parameters
 670        ----------
 671        samples : list
 672            list of code output samples.
 673        x : array
 674            One or more locations in stochastic space at which to evaluate
 675            the surrogate.
 676
 677        Returns
 678        -------
 679        surr : array
 680            The interpolated values of the code output at input locations
 681            specified by x.
 682
 683        """
 684        # Computing the tensor grid of each multiindex l (xi_d below)
 685        # every time is slow. Instead store it globally, and only recompute when
 686        # self.l_norm has changed, when the flag init_interpolation = True.
 687        # This flag is set to True when self.analyse is executed
 688        if self.init_interpolation:
 689            self.xi_d_per_l = {}
 690            for l in self.l_norm:
 691                # all points corresponding to l
 692                xi = [self.xi_1d[n][l[n]] for n in range(self.N)]
 693                self.xi_d_per_l[tuple(l)] = np.array(list(product(*xi)))
 694            self.init_interpolation = False
 695
 696        surr = 0.0
 697        for l in self.l_norm:
 698            # all points corresponding to l
 699            # xi = [self.xi_1d[n][l[n]] for n in range(self.N)]
 700            # xi_d = np.array(list(product(*xi)))
 701            xi_d = self.xi_d_per_l[tuple(l)]
 702
 703            for xi in xi_d:
 704                # indices of current collocation point
 705                # in corresponding 1d colloc points (self.xi_1d[n][l[n]])
 706                # These are the j of the 1D lagrange polynomials l_j(x), see
 707                # lagrange_poly subroutine
 708                idx = [(self.xi_1d[n][l[n]] == xi[n]).nonzero()[0][0] for n in range(self.N)]
 709                # index of the code sample
 710                sample_idx = np.where((xi == self.xi_d).all(axis=1))[0][0]
 711
 712                # values of Lagrange polynomials at x
 713                if x.ndim == 1:
 714                    weight = [lagrange_poly(x[n], self.xi_1d[n][l[n]], idx[n])
 715                              for n in range(self.N)]
 716                    surr += self.comb_coef[tuple(l)] * samples[sample_idx] * np.prod(weight, axis=0)
 717                # batch setting, if multiple x values are presribed
 718                else:
 719                    weight = [lagrange_poly(x[:, n], self.xi_1d[n][l[n]], idx[n])
 720                              for n in range(self.N)]
 721                    surr += self.comb_coef[tuple(l)] * samples[sample_idx] * \
 722                        np.prod(weight, axis=0).reshape([-1, 1])
 723
 724        return surr
 725
 726    def get_sample_array(self, qoi):
 727        """
 728        Parameters
 729        ----------
 730        qoi : str
 731            name of quantity of interest
 732
 733        Returns
 734        -------
 735        array of all samples of qoi
 736        """
 737        return np.array([self.samples[qoi][k] for k in range(len(self.samples[qoi]))])
 738
 739    def adaptation_histogram(self):
 740        """Plots a bar chart of the maximum order of the quadrature rule
 741        that is used in each dimension. Use in case of the dimension adaptive
 742        sampler to get an idea of which parameters were more refined than others.
 743        This gives only a first-order idea, as it only plots the max quad
 744        order independently per input parameter, so higher-order refinements
 745        that were made do not show up in the bar chart.
 746        """
 747        import matplotlib.pyplot as plt
 748
 749        fig = plt.figure('adapt_hist', figsize=[4, 8])
 750        ax = fig.add_subplot(111, ylabel='max quadrature order',
 751                             title='Number of refinements = %d'
 752                             % self.sampler.nadaptations)
 753        # find max quad order for every parameter
 754        adapt_measure = np.max(self.l_norm, axis=0)
 755        ax.bar(range(adapt_measure.size), height=adapt_measure - 1)
 756        params = list(self.sampler.vary.get_keys())
 757        ax.set_xticks(range(adapt_measure.size))
 758        ax.set_xticklabels(params)
 759        plt.xticks(rotation=90)
 760        plt.tight_layout()
 761        plt.show()
 762
 763    def adaptation_table(self, **kwargs):
 764        """Plots a color-coded table of the quadrature-order refinement.
 765        Shows in what order the parameters were refined, and unlike
 766        adaptation_histogram, this also shows higher-order refinements.
 767
 768        Parameters
 769        ----------
 770        **kwargs: can contain kwarg 'order' to specify the order in which
 771        the variables on the x axis are plotted (e.g. in order of decreasing
 772        1st order Sobol index).
 773
 774        Returns
 775        -------
 776        None.
 777
 778        """
 779
 780        # if specified, plot the variables on the x axis in a given order
 781        if 'order' in kwargs:
 782            order = kwargs['order']
 783        else:
 784            order = range(self.N)
 785
 786        l = np.copy(self.l_norm)[:, order]
 787
 788        import matplotlib as mpl
 789        import matplotlib.pyplot as plt
 790
 791        fig = plt.figure(figsize=[12, 6])
 792        ax = fig.add_subplot(111)
 793
 794        # max quad order
 795        M = np.max(l)
 796        cmap = plt.get_cmap('Purples', M)
 797        # plot 'heat map' of refinement
 798        im = plt.imshow(l.T, cmap=cmap, aspect='auto')
 799        norm = mpl.colors.Normalize(vmin=0, vmax=M - 1)
 800        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
 801        sm.set_array([])
 802        cb = plt.colorbar(im)
 803        # plot the quad order in the middle of the colorbar intervals
 804        p = np.linspace(1, M, M+1)
 805        tick_p = 0.5 * (p[1:] + p[0:-1])
 806        cb.set_ticks(tick_p)
 807        cb.set_ticklabels(np.arange(M))
 808        cb.set_label(r'quadrature order')
 809        # plot the variables names on the x axis
 810        ax.set_yticks(range(l.shape[1]))
 811        params = np.array(list(self.sampler.vary.get_keys()))
 812        ax.set_yticklabels(params[order], fontsize=12)
 813        # ax.set_yticks(range(l.shape[0]))
 814        ax.set_xlabel('iteration')
 815        # plt.yticks(rotation=90)
 816        plt.tight_layout()
 817        plt.show()
 818
 819    def plot_grid(self):
 820        """Plots the collocation points for 2 and 3 dimensional problems
 821        """
 822        import matplotlib.pyplot as plt
 823
 824        if self.N == 2:
 825            fig = plt.figure()
 826            ax = fig.add_subplot(111, xlabel=r'$x_1$', ylabel=r'$x_2$')
 827            ax.plot(self.xi_d[:, 0], self.xi_d[:, 1], 'ro')
 828            plt.tight_layout()
 829            plt.show()
 830        elif self.N == 3:
 831            from mpl_toolkits.mplot3d import Axes3D
 832            fig = plt.figure()
 833            ax = fig.add_subplot(111, projection='3d', xlabel=r'$x_1$',
 834                                 ylabel=r'$x_2$', zlabel=r'$x_3$')
 835            ax.scatter(self.xi_d[:, 0], self.xi_d[:, 1], self.xi_d[:, 2])
 836            plt.tight_layout()
 837            plt.show()
 838        else:
 839            logging.debug('Will only plot for N = 2 or N = 3.')
 840
 841    def SC2PCE(self, samples, qoi, verbose=True, **kwargs):
 842        """Computes the Polynomials Chaos Expansion coefficients from the SC
 843        expansion via a transformation of basis (Lagrange polynomials basis -->
 844        orthonomial basis).
 845
 846        Parameters
 847        ----------
 848        samples : array
 849            SC code samples from which to compute the PCE coefficients
 850
 851        qoi : string
 852            Name of the QoI.
 853
 854        Returns
 855        -------
 856        pce_coefs : dict
 857            PCE coefficients per multi index l
 858        """
 859        pce_coefs = {}
 860
 861        if 'l_norm' in kwargs:
 862            l_norm = kwargs['l_norm']
 863        else:
 864            l_norm = self.l_norm
 865
 866        if 'xi_d' in kwargs:
 867            xi_d = kwargs['xi_d']
 868        else:
 869            xi_d = self.xi_d
 870
 871        # if not hasattr(self, 'pce_coefs'):
 872        #     self.pce_coefs = {}
 873
 874        count_l = 1
 875        for l in l_norm:
 876            if not tuple(l) in self.pce_coefs[qoi].keys():
 877                # pce coefficients for current multi-index l
 878                pce_coefs[tuple(l)] = {}
 879
 880                # 1d points generated by l
 881                x_1d = [self.xi_1d[n][l[n]] for n in range(self.N)]
 882                # 1d Lagrange polynomials generated by l
 883                # EDIT: do not use chaospy for Lagrange, converts lagrange into monomial, requires
 884                # Vandermonde matrix inversion to find coefficients, which becomes
 885                # very ill conditioned rather quickly. Can no longer use cp.E to compute
 886                # integrals, use GQ instead
 887                # a_1d = [cp.lagrange_polynomial(sampler.xi_1d[n][l[n]]) for n in range(d)]
 888
 889                # N-dimensional grid generated by l
 890                x_l = np.array(list(product(*x_1d)))
 891
 892                # all multi indices of the PCE expansion: k <= l
 893                k_norm = list(product(*[np.arange(1, l[n] + 1) for n in range(self.N)]))
 894
 895                if verbose:
 896                    logging.debug('Computing PCE coefficients %d / %d' % (count_l, l_norm.shape[0]))
 897
 898                for k in k_norm:
 899                    # product of the PCE basis function or order k - 1 and all
 900                    # Lagrange basis functions in a_1d, per dimension
 901                    # [[phi_k[0]*a_1d[0]], ..., [phi_k[N-1]*a_1d[N-1]]]
 902
 903                    # orthogonal polynomial generated by chaospy
 904                    phi_k = [cp.expansion.stieltjes(k[n] - 1,
 905                                                    dist=self.sampler.params_distribution[n],
 906                                                    normed=True)[-1] for n in range(self.N)]
 907
 908                    # the polynomial order of each integrand phi_k*a_j = (k - 1) + (number of
 909                    # colloc. points - 1)
 910                    orders = [(k[n] - 1) + (self.xi_1d[n][l[n]].size - 1) for n in range(self.N)]
 911
 912                    # will hold the products of PCE basis functions phi_k and lagrange
 913                    # interpolation polynomials a_1d
 914                    cross_prod = []
 915
 916                    for n in range(self.N):
 917                        # GQ using n points can exactly integrate polynomials of order 2n-1:
 918                        # solve for required number of points n when given order
 919                        n_quad_points = int(np.ceil((orders[n] + 1) / 2))
 920
 921                        # generate Gaussian quad rule
 922                        if isinstance(self.sampler.params_distribution[n], cp.DiscreteUniform):
 923                            xi = self.xi_1d[n][l[n]]
 924                            wi = self.wi_1d[n][l[n]]
 925                        else:
 926                            xi, wi = cp.generate_quadrature(
 927                                n_quad_points - 1, self.sampler.params_distribution[n], rule="G")
 928                            xi = xi[0]
 929
 930                        # number of colloc points = number of Lagrange polynomials
 931                        n_lagrange_poly = int(self.xi_1d[n][l[n]].size)
 932
 933                        # compute the v coefficients = coefficients of SC2PCE mapping
 934                        v_coefs_n = []
 935                        for j in range(n_lagrange_poly):
 936                            # compute values of Lagrange polys at quadrature points
 937                            l_j = np.array([lagrange_poly(xi[i], self.xi_1d[n][l[n]], j)
 938                                            for i in range(xi.size)])
 939                            # each coef is the integral of the lagrange poly times the current
 940                            # orthogonal PCE poly
 941                            v_coefs_n.append(np.sum(l_j * phi_k[n](xi) * wi))
 942                        cross_prod.append(v_coefs_n)
 943
 944                    # tensor product of all integrals
 945                    integrals = np.array(list(product(*cross_prod)))
 946                    # multiply over the number of parameters: v_prod = v_k1_j1 * ... * v_kd_jd
 947                    v_prod = np.prod(integrals, axis=1)
 948                    v_prod = v_prod.reshape([v_prod.size, 1])
 949
 950                    # find corresponding code values
 951                    f_k = np.array([samples[np.where((x == xi_d).all(axis=1))[0][0]] for x in x_l])
 952
 953                    # the sum of all code sample * v_{k,j_1} * ... * v_{k,j_N}
 954                    # equals the PCE coefficient
 955                    eta_k = np.sum(f_k * v_prod, axis=0).T
 956
 957                    pce_coefs[tuple(l)][tuple(k)] = eta_k
 958            else:
 959                # pce coefs previously computed, just copy result
 960                pce_coefs[tuple(l)] = self.pce_coefs[qoi][tuple(l)]
 961            count_l += 1
 962
 963        logging.debug('done')
 964        return pce_coefs
 965
 966    def generalized_pce_coefs(self, l_norm, pce_coefs, comb_coef):
 967        """
 968        Computes the generalized PCE coefficients, defined as the linear combibation
 969        of PCE coefficients which make it possible to write the dimension-adaptive
 970        PCE expansion in standard form. See DOI: 10.13140/RG.2.2.18085.58083/1
 971
 972        Parameters
 973        ----------
 974        l_norm : array
 975            array of quadrature order multi indices
 976        pce_coefs : tuple
 977            tuple of PCE coefficients computed by SC2PCE subroutine
 978        comb_coef : tuple
 979            tuple of combination coefficients computed by compute_comb_coef
 980
 981        Returns
 982        -------
 983        gen_pce_coefs : tuple
 984            The generalized PCE coefficients, indexed per multi index.
 985
 986        """
 987        assert self.sparse, "Generalized PCE coeffcients are computed only for sparse grids"
 988
 989        # the set of all forward neighbours of l: {k | k >= l}
 990        F_l = {}
 991        # the generalized PCE coefs, which turn the adaptive PCE into a standard PCE expansion
 992        gen_pce_coefs = {}
 993        for l in l_norm:
 994            # {indices of k | k >= l}
 995            idx = np.where((l <= l_norm).all(axis=1))[0]
 996            F_l[tuple(l)] = l_norm[idx]
 997
 998            # the generalized PCE coefs are comb_coef[k] * pce_coefs[k][l], summed over k
 999            # for a fixed l
1000            gen_pce_coefs[tuple(l)] = 0.0
1001            for k in F_l[tuple(l)]:
1002                gen_pce_coefs[tuple(l)] += comb_coef[tuple(k)] * pce_coefs[tuple(k)][tuple(l)]
1003
1004        return gen_pce_coefs
1005
1006    def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
1007        """Compute the mean and the variance based on the generalized PCE coefficients
1008        See DOI: 10.13140/RG.2.2.18085.58083/1
1009
1010        Parameters
1011        ----------
1012        l_norm : array
1013            array of quadrature order multi indices
1014        pce_coefs : tuple
1015            tuple of PCE coefficients computed by SC2PCE subroutine
1016        comb_coef : tuple
1017            tuple of combination coefficients computed by compute_comb_coef
1018
1019        Returns
1020        -------
1021        mean, variance and generalized pce coefficients
1022        """
1023
1024        gen_pce_coefs = self.generalized_pce_coefs(l_norm, pce_coefs, comb_coef)
1025
1026        # with the generalized pce coefs, the standard PCE formulas for the mean and var
1027        # can be used for the dimension-adaptive PCE
1028
1029        # the PCE mean is just the 1st generalized PCE coef
1030        l1 = tuple(np.ones(self.N, dtype=int))
1031        mean = gen_pce_coefs[l1]
1032
1033        # the variance is the sum of the squared generalized PCE coefs, excluding the 1st coef
1034        D = 0.0
1035        for l in l_norm[1:]:
1036            D += gen_pce_coefs[tuple(l)] ** 2
1037
1038        if type(D) is float:
1039            D = np.array([D])
1040
1041        return mean, D, gen_pce_coefs
1042
1043    def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
1044        """Computes Sobol indices using Polynomials Chaos coefficients. These
1045        coefficients are computed from the SC expansion via a transformation
1046        of basis (SC2PCE subroutine). This works better than computing the
1047        Sobol indices directly from the SC expansion in the case of the
1048        dimension-adaptive sampler. See DOI: 10.13140/RG.2.2.18085.58083/1
1049
1050        Method: J.D. Jakeman et al, "Adaptive multi-index collocation
1051        for uncertainty quantification and sensitivity analysis", 2019.
1052        (Page 18)
1053
1054        Parameters
1055        ----------
1056        qoi : str
1057            name of the Quantity of Interest for which to compute the indices
1058        typ : str
1059            Default = 'first_order'. 'all' is also possible
1060        **kwargs : dict
1061            if this contains 'samples', use these instead of the SC samples ]
1062            in the database
1063
1064        Returns
1065        -------
1066        Tuple
1067            Mean: PCE mean
1068            Var: PCE variance
1069            S_u: PCE Sobol indices, either the first order indices or all indices
1070        """
1071
1072        if 'samples' in kwargs:
1073            samples = kwargs['samples']
1074            N_qoi = samples[0].size
1075        else:
1076            samples = self.samples[qoi]
1077            N_qoi = self.N_qoi[qoi]
1078
1079        # compute the (generalized) PCE coefficients and stats
1080        self.pce_coefs[qoi] = self.SC2PCE(samples, qoi)
1081        mean, D, gen_pce_coefs = self.get_pce_stats(
1082            self.l_norm, self.pce_coefs[qoi], self.comb_coef)
1083
1084        logging.debug('Computing Sobol indices...')
1085        # Universe = (0, 1, ..., N - 1)
1086        U = np.arange(self.N)
1087
1088        # the powerset of U for either the first order or all Sobol indices
1089        if typ == 'first_order':
1090            P = [()]
1091            for i in range(self.N):
1092                P.append((i,))
1093        else:
1094            # all indices u
1095            P = list(powerset(U))
1096
1097        # dict to hold the partial Sobol variances and Sobol indices
1098        D_u = {}
1099        S_u = {}
1100        for u in P[1:]:
1101            # complement of u
1102            u_prime = np.delete(U, u)
1103            k = []
1104            D_u[u] = np.zeros(N_qoi)
1105            S_u[u] = np.zeros(N_qoi)
1106
1107            # compute the set of multi indices corresponding to varying ONLY
1108            # the inputs indexed by u
1109            for l in self.l_norm:
1110                # assume l_i = 1 for all i in u' until found otherwise
1111                all_ones = True
1112                for i_up in u_prime:
1113                    if l[i_up] != 1:
1114                        all_ones = False
1115                        break
1116                # if l_i = 1 for all i in u'
1117                if all_ones:
1118                    # assume all l_i for i in u are > 1
1119                    all_gt_one = True
1120                    for i_u in u:
1121                        if l[i_u] == 1:
1122                            all_gt_one = False
1123                            break
1124                    # if both conditions above are True, the current l varies
1125                    # only inputs indexed by u, add this l to k
1126                    if all_gt_one:
1127                        k.append(l)
1128
1129            logging.debug('Multi indices of dimension  %s are %s' % (u, k))
1130            # the partial variance of u is the sum of all variances index by k
1131            for k_u in k:
1132                D_u[u] = D_u[u] + gen_pce_coefs[tuple(k_u)] ** 2
1133
1134            # normalize D_u by total variance D to get the Sobol index
1135            if 0 in D:
1136                with warnings.catch_warnings():
1137                    # ignore divide by zero warning
1138                    warnings.simplefilter("ignore")
1139                    S_u[u] = D_u[u] / D    
1140            else:
1141                S_u[u] = D_u[u] / D
1142
1143        logging.debug('done')
1144        return mean, D, D_u, S_u
1145
1146    # Start SC specific methods
1147
1148    @staticmethod
1149    def compute_tensor_prod_u(xi, wi, u, u_prime):
1150        """
1151        Calculate tensor products of weights and collocation points
1152        with dimension of u and u'
1153
1154        Parameters
1155        ----------
1156        xi (array of floats): 1D colloction points
1157        wi (array of floats): 1D quadrature weights
1158        u  (array of int): dimensions
1159        u_prime (array of int): remaining dimensions (u union u' = range(N))
1160
1161        Returns
1162        dict of tensor products of weight and points for dimensions u and u'
1163        -------
1164
1165        """
1166
1167        # tensor products with dimension of u
1168        xi_u = [xi[key] for key in u]
1169        wi_u = [wi[key] for key in u]
1170
1171        xi_d_u = np.array(list(product(*xi_u)))
1172        wi_d_u = np.array(list(product(*wi_u)))
1173
1174        # tensor products with dimension of u' (complement of u)
1175        xi_u_prime = [xi[key] for key in u_prime]
1176        wi_u_prime = [wi[key] for key in u_prime]
1177
1178        xi_d_u_prime = np.array(list(product(*xi_u_prime)))
1179        wi_d_u_prime = np.array(list(product(*wi_u_prime)))
1180
1181        return xi_d_u, wi_d_u, xi_d_u_prime, wi_d_u_prime
1182
1183    def compute_marginal(self, qoi, u, u_prime, diff):
1184        """
1185        Computes a marginal integral of the qoi(x) over the dimension defined
1186        by u_prime, for every x value in dimensions u
1187
1188        Parameters
1189        ----------
1190        - qoi (str): name of the quantity of interest
1191        - u (array of int): dimensions which are not integrated
1192        - u_prime (array of int): dimensions which are integrated
1193        - diff (array of int): levels
1194
1195        Returns
1196        - Values of the marginal integral
1197        -------
1198
1199        """
1200        # 1d weights and points of the levels in diff
1201        xi = [self.xi_1d[n][np.abs(diff)[n]] for n in range(self.N)]
1202        wi = [self.wi_1d[n][np.abs(diff)[n]] for n in range(self.N)]
1203
1204        # compute tensor products and weights in dimension u and u'
1205        xi_d_u, wi_d_u, xi_d_u_prime, wi_d_u_prime =\
1206            self.compute_tensor_prod_u(xi, wi, u, u_prime)
1207
1208        idxs = np.argsort(np.concatenate((u, u_prime)))
1209        # marginals h = f*w' integrated over u', so cardinality is that of u
1210        h = [0.0] * xi_d_u.shape[0]
1211        for i_u, xi_d_u_ in enumerate(xi_d_u):
1212            for i_up, xi_d_u_prime_ in enumerate(xi_d_u_prime):
1213                xi_s = np.concatenate((xi_d_u_, xi_d_u_prime_))[idxs]
1214                # find the index of the corresponding code sample
1215                idx = np.where(np.prod(self.xi_d == xi_s, axis=1))[0][0]
1216                # perform quadrature
1217                q_k = self.samples[qoi][idx]
1218                h[i_u] += q_k * wi_d_u_prime[i_up].prod()
1219        # return marginal and the weights of dimensions u
1220        return h, wi_d_u
1221
1222    def get_sobol_indices(self, qoi, typ='first_order'):
1223        """
1224        Computes Sobol indices using Stochastic Collocation. Method:
1225        Tang (2009), GLOBAL SENSITIVITY ANALYSIS  FOR STOCHASTIC COLLOCATION
1226        EXPANSION.
1227
1228        Parameters
1229        ----------
1230        qoi (str): name of the Quantity of Interest for which to compute the indices
1231        typ (str): Default = 'first_order'. 'all' is also possible
1232
1233        Returns
1234        -------
1235        Either the first order or all Sobol indices of qoi
1236        """
1237        logging.debug('Computing Sobol indices...')
1238        # multi indices
1239        U = np.arange(self.N)
1240
1241        if typ == 'first_order':
1242            P = list(powerset(U))[0:self.N + 1]
1243        elif typ == 'all':
1244            # all indices u
1245            P = list(powerset(U))
1246        else:
1247            logging.debug('Specified Sobol Index type %s not recognized' % typ)
1248            logging.debug('Accepted are first_order or all')
1249            import sys
1250            sys.exit()
1251
1252        # get first two moments
1253        mu, D = self.get_moments(qoi)
1254
1255        # partial variances
1256        D_u = {P[0]: mu**2}
1257
1258        sobol = {}
1259
1260        for u in P[1:]:
1261
1262            # complement of u
1263            u_prime = np.delete(U, u)
1264            D_u[u] = 0.0
1265
1266            for l in self.l_norm:
1267
1268                # expand the multi-index indices of the tensor product
1269                # (Q^1_{i1} - Q^1_{i1-1}) X ... X (Q^1_{id) - Q^1_{id-1})
1270                diff_idx = np.array(list(product(*[[k, -(k - 1)] for k in l])))
1271
1272                # perform analysis on each Q^1_l1 X ... X Q^1_l_N tensor prod
1273                for diff in diff_idx:
1274
1275                    # if any Q^1_li is below the minimim level, Q^1_li is defined
1276                    # as zero: do not compute this Q^1_l1 X ... X Q^1_l_N tensor prod
1277                    if not (np.abs(diff) < self.l_norm_min).any():
1278
1279                        # mariginal integral h, integrate over dimensions u'
1280                        h, wi_d_u = self.compute_marginal(qoi, u, u_prime, diff)
1281
1282                        # square result and integrate over remaining dimensions u
1283                        for i_u in range(wi_d_u.shape[0]):
1284                            D_u[u] += np.sign(np.prod(diff)) * h[i_u]**2 * wi_d_u[i_u].prod()
1285
1286                # D_u[u] = D_u[u].flatten()
1287
1288            # all subsets of u
1289            W = list(powerset(u))[0:-1]
1290
1291            # partial variance of u
1292            for w in W:
1293                D_u[u] -= D_u[w]
1294
1295            # compute Sobol index, only include points where D > 0
1296            if 0 in D:
1297                with warnings.catch_warnings():
1298                    # ignore divide by zero warning
1299                    warnings.simplefilter("ignore")
1300                    sobol[u] = D_u[u] / D                  
1301            else:
1302                sobol[u] = D_u[u] / D
1303
1304        logging.debug('done.')
1305        return sobol
1306
1307    def get_uncertainty_amplification(self, qoi):
1308        """
1309        Computes a measure that signifies the ratio of output to input
1310        uncertainty. It is computed as the (mean) Coefficient of Variation (V)
1311        of the output divided by the (mean) CV of the input.
1312
1313        Parameters
1314        ----------
1315        qoi (string): name of the Quantity of Interest
1316
1317        Returns
1318        -------
1319        blowup (float): the ratio output CV / input CV
1320
1321        """
1322
1323        mean_f, var_f = self.get_moments(qoi)
1324        std_f = np.sqrt(var_f)
1325
1326        mean_xi = []
1327        std_xi = []
1328        CV_xi = []
1329        for param in self.sampler.params_distribution:
1330            E = cp.E(param)
1331            Std = cp.Std(param)
1332            mean_xi.append(E)
1333            std_xi.append(Std)
1334            CV_xi.append(Std / E)
1335
1336        CV_in = np.mean(CV_xi)
1337        CV_out = std_f / mean_f
1338        idx = np.where(np.isnan(CV_out) == False)[0]
1339        CV_out = np.mean(CV_out[idx])
1340        blowup = CV_out / CV_in
1341
1342        print('-----------------')
1343        print('Mean CV input = %.4f %%' % (100 * CV_in, ))
1344        print('Mean CV output = %.4f %%' % (100 * CV_out, ))
1345        print('Uncertainty amplification factor = %.4f/%.4f = %.4f' %
1346              (CV_out, CV_in, blowup))
1347        print('-----------------')
1348
1349        return blowup

Base class for all EasyVVUQ analysis elements.

Attributes

SCAnalysis(sampler=None, qoi_cols=None) View Source

 90    def __init__(self, sampler=None, qoi_cols=None):
 91        """
 92        Parameters
 93        ----------
 94        sampler : SCSampler
 95            Sampler used to initiate the SC analysis
 96        qoi_cols : list or None
 97            Column names for quantities of interest (for which analysis is
 98            performed).
 99        """
100
101        if sampler is None:
102            msg = 'SC analysis requires a paired sampler to be passed'
103            raise RuntimeError(msg)
104
105        if qoi_cols is None:
106            raise RuntimeError("Analysis element requires a list of "
107                               "quantities of interest (qoi)")
108
109        self.qoi_cols = qoi_cols
110        self.output_type = OutputType.SUMMARY
111        self.sampler = sampler
112        self.dimension_adaptive = sampler.dimension_adaptive
113        if self.dimension_adaptive:
114            self.adaptation_errors = []
115            self.mean_history = []
116            self.std_history = []
117        self.sparse = sampler.sparse
118        self.pce_coefs = {}
119        self.N_qoi = {}
120        for qoi_k in qoi_cols:
121            self.pce_coefs[qoi_k] = {}
122            self.N_qoi[qoi_k] = 0

Parameters

sampler (SCSampler): Sampler used to initiate the SC analysis
qoi_cols (list or None): Column names for quantities of interest (for which analysis is performed).

qoi_cols

output_type

sampler

dimension_adaptive

sparse

pce_coefs

N_qoi

def element_name(self): View Source

124    def element_name(self):
125        """Name for this element for logging purposes"""
126        return "SC_Analysis"

Name for this element for logging purposes

def element_version(self): View Source

128    def element_version(self):
129        """Version of this element for logging purposes"""
130        return "0.5"

Version of this element for logging purposes

def save_state(self, filename): View Source

132    def save_state(self, filename):
133        """Saves the complete state of the analysis object to a pickle file,
134        except the sampler object (self.samples).
135
136        Parameters
137        ----------
138        filename : string
139            name to the file to write the state to
140        """
141        logging.debug("Saving analysis state to %s" % filename)
142        # make a copy of the state, and do not store the sampler as well
143        state = copy.copy(self.__dict__)
144        del state['sampler']
145        file = open(filename, 'wb')
146        pickle.dump(state, file)
147        file.close()

Saves the complete state of the analysis object to a pickle file, except the sampler object (self.samples).

Parameters

filename (string): name to the file to write the state to

def load_state(self, filename): View Source

149    def load_state(self, filename):
150        """Loads the complete state of the analysis object from a
151        pickle file, stored using save_state.
152
153        Parameters
154        ----------
155        filename : string
156            name of the file to load
157        """
158        logging.debug("Loading analysis state from %s" % filename)
159        file = open(filename, 'rb')
160        state = pickle.load(file)
161        for key in state.keys():
162            self.__dict__[key] = state[key]
163        file.close()

Loads the complete state of the analysis object from a pickle file, stored using save_state.

Parameters

filename (string): name of the file to load

def analyse(self, data_frame=None, compute_moments=True, compute_Sobols=True): View Source

165    def analyse(self, data_frame=None, compute_moments=True, compute_Sobols=True):
166        """Perform SC analysis on input `data_frame`.
167
168        Parameters
169        ----------
170        data_frame : pandas.DataFrame
171            Input data for analysis.
172
173        Returns
174        -------
175        dict
176            Results dictionary with sub-dicts with keys:
177            ['statistical_moments', 'sobol_indices'].
178            Each dict has an entry for each item in `qoi_cols`.
179        """
180
181        if data_frame is None:
182            raise RuntimeError("Analysis element needs a data frame to "
183                               "analyse")
184        elif isinstance(data_frame, pd.DataFrame) and data_frame.empty:
185            raise RuntimeError(
186                "No data in data frame passed to analyse element")
187
188        # the number of uncertain parameters
189        self.N = self.sampler.N
190        # tensor grid
191        self.xi_d = self.sampler.xi_d
192        # the maximum level (quad order) of the (sparse) grid
193        self.L = self.sampler.L
194
195        # if L < L_min: quadratures and interpolations are zero
196        # For full tensor grid: there is only one level: L_min = L
197        if not self.sparse:
198            self.L_min = self.L
199            self.l_norm = np.array([self.sampler.polynomial_order])
200            self.l_norm_min = self.l_norm
201        # For sparse grid: one or more levels
202        else:
203            self.L_min = 1
204            # multi indices (stored in l_norm) for isotropic sparse grid or
205            # dimension-adaptive grid before the 1st refinement.
206            # If dimension_adaptive and nadaptations > 0: l_norm
207            # is computed in self.adaptation_metric
208            if not self.dimension_adaptive or self.sampler.nadaptations == 0:
209                # the maximum level (quad order) of the (sparse) grid
210                self.l_norm = self.sampler.compute_sparse_multi_idx(self.L, self.N)
211
212            self.l_norm_min = np.ones(self.N, dtype=int)
213
214        # #compute generalized combination coefficients
215        self.comb_coef = self.compute_comb_coef()
216
217        # 1d weights and points per level
218        self.xi_1d = self.sampler.xi_1d
219        # self.wi_1d = self.compute_SC_weights(rule=self.sampler.quad_rule)
220        self.wi_1d = self.sampler.wi_1d
221
222        # Extract output values for each quantity of interest from Dataframe
223        logging.debug('Loading samples...')
224        qoi_cols = self.qoi_cols
225        samples = {k: [] for k in qoi_cols}
226        for run_id in data_frame[('run_id', 0)].unique():
227            for k in qoi_cols:
228                values = data_frame.loc[data_frame[('run_id', 0)] == run_id][k].values
229                samples[k].append(values.flatten())
230        self.samples = samples
231        logging.debug('done')
232
233        # assume that self.l_norm has changed, and that the interpolation
234        # must be initialised, see sc_expansion subroutine
235        self.init_interpolation = True
236
237        # same pce coefs must be computed for every qoi
238        if self.sparse:
239            for qoi_k in qoi_cols:
240                self.pce_coefs[qoi_k] = self.SC2PCE(samples[qoi_k], qoi_k)
241                # size of one code sample
242                self.N_qoi[qoi_k] = self.samples[qoi_k][0].size
243
244        # Compute descriptive statistics for each quantity of interest
245        results = {'statistical_moments': {},
246                   'sobols_first': {k: {} for k in self.qoi_cols},
247                   'sobols': {k: {} for k in self.qoi_cols}}
248
249        if compute_moments:
250            for qoi_k in qoi_cols:
251                if not self.sparse:
252                    mean_k, var_k = self.get_moments(qoi_k)
253                    std_k = np.sqrt(var_k)
254                else:
255                    self.pce_coefs[qoi_k] = self.SC2PCE(self.samples[qoi_k], qoi_k)
256                    mean_k, var_k, _ = self.get_pce_stats(self.l_norm, self.pce_coefs[qoi_k],
257                                                          self.comb_coef)
258                    std_k = np.sqrt(var_k)
259
260                # compute statistical moments
261                results['statistical_moments'][qoi_k] = {'mean': mean_k,
262                                                         'var': var_k,
263                                                         'std': std_k}
264
265        if compute_Sobols:
266            for qoi_k in qoi_cols:
267                if not self.sparse:
268                    results['sobols'][qoi_k] = self.get_sobol_indices(qoi_k, 'first_order')
269                else:
270                    _, _, _, results['sobols'][qoi_k] = self.get_pce_sobol_indices(
271                        qoi_k, 'first_order')
272
273                for idx, param_name in enumerate(self.sampler.vary.get_keys()):
274                    results['sobols_first'][qoi_k][param_name] = \
275                        results['sobols'][qoi_k][(idx,)]
276
277        results = SCAnalysisResults(raw_data=results, samples=data_frame,
278                                    qois=qoi_cols, inputs=list(self.sampler.vary.get_keys()))
279        results.surrogate_ = self.surrogate
280        return results

Perform SC analysis on input data_frame.

Parameters

data_frame (pandas.DataFrame): Input data for analysis.

Returns

dict: Results dictionary with sub-dicts with keys: ['statistical_moments', 'sobol_indices']. Each dict has an entry for each item in qoi_cols.

def compute_comb_coef(self, **kwargs): View Source

282    def compute_comb_coef(self, **kwargs):
283        """Compute general combination coefficients. These are the coefficients
284        multiplying the tensor products associated to each multi index l,
285        see page 12 Gerstner & Griebel, numerical integration using sparse grids
286        """
287
288        if 'l_norm' in kwargs:
289            l_norm = kwargs['l_norm']
290        else:
291            l_norm = self.l_norm
292
293        comb_coef = {}
294        logging.debug('Computing combination coefficients...')
295        for k in l_norm:
296            coef = 0.0
297            # for every k, subtract all multi indices
298            for l in l_norm:
299                z = l - k
300                # if the results contains only 0's and 1's, then z is the
301                # vector that can be formed from a tensor product of unit vectors
302                # for which k+z is in self.l_norm
303                if np.array_equal(z, z.astype(bool)):
304                    coef += (-1)**(np.sum(z))
305            comb_coef[tuple(k)] = coef
306        logging.debug('done')
307        return comb_coef

Compute general combination coefficients. These are the coefficients multiplying the tensor products associated to each multi index l, see page 12 Gerstner & Griebel, numerical integration using sparse grids

def adapt_dimension( self, qoi, data_frame, store_stats_history=True, method='surplus', **kwargs): View Source

309    def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
310                        method='surplus', **kwargs):
311        """Compute the adaptation metric and decide which of the admissible
312        level indices to include in next iteration of the sparse grid. The
313        adaptation metric is based on the hierarchical surplus, defined as the
314        difference between the new code values of the admissible level indices,
315        and the SC surrogate of the previous iteration. Alternatively, it can be
316        based on the difference between the output mean of the current level,
317        and the mean computed with one extra admissible index.
318
319        This subroutine must be called AFTER the code is evaluated at
320        the new points, but BEFORE the analysis is performed.
321
322        Parameters
323        ----------
324        qoi : string
325            the name of the quantity of interest which is used
326            to base the adaptation metric on.
327        data_frame : pandas.DataFrame
328        store_stats_history : bool
329            store the mean and variance at each refinement in self.mean_history
330            and self.std_history. Used for checking convergence in the statistics
331            over the refinement iterations
332        method : string
333            name of the refinement error, default is 'surplus'. In this case the
334            error is based on the hierarchical surplus, which is an interpolation
335            based error. Another possibility is 'var',
336            in which case the error is based on the difference in the
337            variance between the current estimate and the estimate obtained
338            when a particular candidate direction is added.
339        """
340        logging.debug('Refining sampling plan...')
341        # load the code samples
342        samples = []
343        if isinstance(data_frame, pd.DataFrame):
344            for run_id in data_frame[('run_id', 0)].unique():
345                values = data_frame.loc[data_frame[('run_id', 0)] == run_id][qoi].values
346                samples.append(values.flatten())
347
348        if method == 'var':
349            all_idx = np.concatenate((self.l_norm, self.sampler.admissible_idx))
350            self.xi_1d = self.sampler.xi_1d
351            self.wi_1d = self.sampler.wi_1d
352            self.pce_coefs[qoi] = self.SC2PCE(samples, qoi, verbose=True, l_norm=all_idx,
353                                              xi_d=self.sampler.xi_d)
354            _, var_l, _ = self.get_pce_stats(self.l_norm, self.pce_coefs[qoi], self.comb_coef)
355
356        # the currently accepted grid points
357        xi_d_accepted = self.sampler.generate_grid(self.l_norm)
358
359        # compute the hierarchical surplus based error for every admissible l
360        error = {}
361        for l in self.sampler.admissible_idx:
362            error[tuple(l)] = []
363            # compute the error based on the hierarchical surplus (interpolation based)
364            if method == 'surplus':
365                # collocation points of current level index l
366                X_l = [self.sampler.xi_1d[n][l[n]] for n in range(self.N)]
367                X_l = np.array(list(product(*X_l)))
368                # only consider new points, subtract the accepted points
369                X_l = setdiff2d(X_l, xi_d_accepted)
370                for xi in X_l:
371                    # find the location of the current xi in the global grid
372                    idx = np.where((xi == self.sampler.xi_d).all(axis=1))[0][0]
373                    # hierarchical surplus error at xi
374                    hier_surplus = samples[idx] - self.surrogate(qoi, xi)
375                    if 'index' in kwargs:
376                        hier_surplus = hier_surplus[kwargs['index']]
377                        error[tuple(l)].append(np.abs(hier_surplus))
378                    else:
379                        error[tuple(l)].append(np.linalg.norm(hier_surplus, np.inf))
380                # compute mean error over all points in X_l
381                error[tuple(l)] = np.mean(error[tuple(l)])
382            # compute the error based on quadrature of the variance
383            elif method == 'var':
384                # create a candidate set of multi indices by adding the current
385                # admissible index to l_norm
386                candidate_l_norm = np.concatenate((self.l_norm, l.reshape([1, self.N])))
387                # now we must recompute the combination coefficients
388                c_l = self.compute_comb_coef(l_norm=candidate_l_norm)
389                _, var_candidate_l, _ = self.get_pce_stats(
390                    candidate_l_norm, self.pce_coefs[qoi], c_l)
391                # error in var
392                error[tuple(l)] = np.linalg.norm(var_candidate_l - var_l, np.inf)
393            else:
394                logging.debug('Specified refinement method %s not recognized' % method)
395                logging.debug('Accepted are surplus, mean or var')
396                import sys
397                sys.exit()
398
399        for key in error.keys():
400            # logging.debug("Surplus error when l = %s is %s" % (key, error[key]))
401            logging.debug("Refinement error for l = %s is %s" % (key, error[key]))
402        # find the admissble index with the largest error
403        l_star = np.array(max(error, key=error.get)).reshape([1, self.N])
404        # logging.debug('Selecting %s for refinement.' % l_star)
405        logging.debug('Selecting %s for refinement.' % l_star)
406        # add max error to list
407        self.adaptation_errors.append(max(error.values()))
408
409        # add l_star to the current accepted level indices
410        self.l_norm = np.concatenate((self.l_norm, l_star))
411        # if someone executes this function twice for some reason,
412        # remove the duplicate l_star entry. Keep order unaltered
413        idx = np.unique(self.l_norm, axis=0, return_index=True)[1]
414        self.l_norm = np.array([self.l_norm[i] for i in sorted(idx)])
415
416        # peform the analyse step, but do not compute moments and Sobols
417        self.analyse(data_frame, compute_moments=False, compute_Sobols=False)
418
419        # if True store the mean and variance at eacht iteration of the adaptive
420        # algorithmn
421        if store_stats_history:
422            # mean_f, var_f = self.get_moments(qoi)
423            logging.debug('Storing moments of iteration %d' % self.sampler.nadaptations)
424            pce_coefs = self.SC2PCE(samples, qoi, verbose=True)
425            mean_f, var_f, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
426            self.mean_history.append(mean_f)
427            self.std_history.append(var_f)
428            logging.debug('done')

Compute the adaptation metric and decide which of the admissible level indices to include in next iteration of the sparse grid. The adaptation metric is based on the hierarchical surplus, defined as the difference between the new code values of the admissible level indices, and the SC surrogate of the previous iteration. Alternatively, it can be based on the difference between the output mean of the current level, and the mean computed with one extra admissible index.

This subroutine must be called AFTER the code is evaluated at the new points, but BEFORE the analysis is performed.

Parameters

qoi (string): the name of the quantity of interest which is used to base the adaptation metric on.
data_frame (pandas.DataFrame):
store_stats_history (bool): store the mean and variance at each refinement in self.mean_history and self.std_history. Used for checking convergence in the statistics over the refinement iterations
method (string): name of the refinement error, default is 'surplus'. In this case the error is based on the hierarchical surplus, which is an interpolation based error. Another possibility is 'var', in which case the error is based on the difference in the variance between the current estimate and the estimate obtained when a particular candidate direction is added.

def merge_accepted_and_admissible(self, level=0, **kwargs): View Source

430    def merge_accepted_and_admissible(self, level=0, **kwargs):
431        """In the case of the dimension-adaptive sampler, there are 2 sets of
432        quadrature multi indices. There are the accepted indices that are actually
433        used in the analysis, and the admissible indices, of which some might
434        move to the accepted set in subsequent iterations. This subroutine merges
435        the two sets of multi indices by moving all admissible to the set of
436        accepted indices.
437        Do this at the end, when no more refinements will be executed. The
438        samples related to the admissble indices are already computed, although
439        not used in the analysis. By executing this subroutine at very end, all
440        computed samples are used during the final postprocessing stage. Execute
441        campaign.apply_analysis to let the new set of indices take effect.
442        If further refinements are executed after all via sampler.look_ahead, the
443        number of new admissible samples to be computed can be very high,
444        especially in high dimensions. It is possible to undo the merge via
445        analysis.undo_merge before new refinements are made. Execute
446        campaign.apply_analysis again to let the old set of indices take effect.
447        """
448
449        if 'include' in kwargs:
450            include = kwargs['include']
451        else:
452            include = np.arange(self.N)
453
454        if self.sampler.dimension_adaptive:
455            logging.debug('Moving admissible indices to the accepted set...')
456            # make a backup of l_norm, such that undo_merge can revert back
457            self.l_norm_backup = np.copy(self.l_norm)
458            # merge admissible and accepted multi indices
459            if level == 0:
460                merged_l = np.concatenate((self.l_norm, self.sampler.admissible_idx))
461            else:
462                admissible_idx = []
463                count = 0
464                for l in self.sampler.admissible_idx:
465                    L = np.sum(l) - self.N + 1
466                    tmp = np.where(l == L)[0]
467                    if L <= level and np.in1d(tmp, include)[0]:
468                        admissible_idx.append(l)
469                        count += 1
470                admissible_idx = np.array(admissible_idx).reshape([count, self.N])
471                merged_l = np.concatenate((self.l_norm, admissible_idx))
472            # make sure final result contains only unique indices and store
473            # results in l_norm
474            idx = np.unique(merged_l, axis=0, return_index=True)[1]
475            # return np.array([merged_l[i] for i in sorted(idx)])
476            self.l_norm = np.array([merged_l[i] for i in sorted(idx)])
477            logging.debug('done')

In the case of the dimension-adaptive sampler, there are 2 sets of quadrature multi indices. There are the accepted indices that are actually used in the analysis, and the admissible indices, of which some might move to the accepted set in subsequent iterations. This subroutine merges the two sets of multi indices by moving all admissible to the set of accepted indices. Do this at the end, when no more refinements will be executed. The samples related to the admissble indices are already computed, although not used in the analysis. By executing this subroutine at very end, all computed samples are used during the final postprocessing stage. Execute campaign.apply_analysis to let the new set of indices take effect. If further refinements are executed after all via sampler.look_ahead, the number of new admissible samples to be computed can be very high, especially in high dimensions. It is possible to undo the merge via analysis.undo_merge before new refinements are made. Execute campaign.apply_analysis again to let the old set of indices take effect.

def undo_merge(self): View Source

479    def undo_merge(self):
480        """This reverses the effect of the merge_accepted_and_admissble subroutine.
481        Execute if further refinement are required after all.
482        """
483        if self.sampler.dimension_adaptive:
484            self.l_norm = self.l_norm_backup
485            logging.debug('Restored old multi indices.')

This reverses the effect of the merge_accepted_and_admissble subroutine. Execute if further refinement are required after all.

def get_adaptation_errors(self): View Source

487    def get_adaptation_errors(self):
488        """Returns self.adaptation_errors
489        """
490        return self.adaptation_errors

Returns self.adaptation_errors

def plot_stat_convergence(self): View Source

492    def plot_stat_convergence(self):
493        """Plots the convergence of the statistical mean and std dev over the different
494        refinements in a dimension-adaptive setting. Specifically the inf norm
495        of the difference between the stats of iteration i and iteration i-1
496        is plotted.
497        """
498        if not self.dimension_adaptive:
499            logging.debug('Only works for the dimension adaptive sampler.')
500            return
501
502        K = len(self.mean_history)
503        if K < 2:
504            logging.debug('Means from at least two refinements are required')
505            return
506        else:
507            differ_mean = np.zeros(K - 1)
508            differ_std = np.zeros(K - 1)
509            for i in range(1, K):
510                differ_mean[i - 1] = np.linalg.norm(self.mean_history[i] -
511                                                    self.mean_history[i - 1], np.inf)
512                # make relative
513                differ_mean[i - 1] = differ_mean[i - 1] / np.linalg.norm(self.mean_history[i - 1],
514                                                                         np.inf)
515
516                differ_std[i - 1] = np.linalg.norm(self.std_history[i] -
517                                                   self.std_history[i - 1], np.inf)
518                # make relative
519                differ_std[i - 1] = differ_std[i - 1] / np.linalg.norm(self.std_history[i - 1],
520                                                                       np.inf)
521
522        import matplotlib.pyplot as plt
523        fig = plt.figure('stat_conv')
524        ax1 = fig.add_subplot(111, title='moment convergence')
525        ax1.set_xlabel('iteration', fontsize=12)
526        # ax1.set_ylabel(r'$ ||\mathrm{mean}_i - \mathrm{mean}_{i - 1}||_\infty$',
527        # color='r', fontsize=12)
528        ax1.set_ylabel(r'relative error mean', color='r', fontsize=12)
529        ax1.plot(range(2, K + 1), differ_mean, color='r', marker='+')
530        ax1.tick_params(axis='y', labelcolor='r')
531
532        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
533
534        # ax2.set_ylabel(r'$ ||\mathrm{var}_i - \mathrm{var}_{i - 1}||_\infty$',
535        # color='b', fontsize=12)
536        ax2.set_ylabel(r'relative error variance', fontsize=12, color='b')
537        ax2.plot(range(2, K + 1), differ_std, color='b', marker='*')
538        ax2.tick_params(axis='y', labelcolor='b')
539
540        plt.tight_layout()
541        plt.show()

Plots the convergence of the statistical mean and std dev over the different refinements in a dimension-adaptive setting. Specifically the inf norm of the difference between the stats of iteration i and iteration i-1 is plotted.

def surrogate(self, qoi, x, L=None): View Source

543    def surrogate(self, qoi, x, L=None):
544        """Use sc_expansion UQP as a surrogate
545
546        Parameters
547        ----------
548        qoi : str
549            name of the qoi
550        x : array
551            location at which to evaluate the surrogate
552        L : int
553            level of the (sparse) grid, default = self.L
554
555        Returns
556        -------
557        the interpolated value of qoi at x (float, (N_qoi,))
558        """
559
560        return self.sc_expansion(self.samples[qoi], x=x)

Use sc_expansion UQP as a surrogate

Parameters

qoi (str): name of the qoi
x (array): location at which to evaluate the surrogate
L (int): level of the (sparse) grid, default = self.L

Returns

the interpolated value of qoi at x (float, (N_qoi,))

def quadrature(self, qoi, samples=None): View Source

562    def quadrature(self, qoi, samples=None):
563        """Computes a (Smolyak) quadrature
564
565        Parameters
566        ----------
567        qoi : str
568            name of the qoi
569
570        samples: array
571            compute the mean by setting samples = self.samples.
572            To compute the variance, set samples = (self.samples - mean)**2
573
574        Returns
575        -------
576        the quadrature of qoi
577        """
578        if samples is None:
579            samples = self.samples[qoi]
580
581        return self.combination_technique(qoi, samples)

Computes a (Smolyak) quadrature

Parameters

qoi (str): name of the qoi
samples (array): compute the mean by setting samples = self.samples. To compute the variance, set samples = (self.samples - mean)**2

Returns

the quadrature of qoi

def combination_technique(self, qoi, samples=None, **kwargs): View Source

583    def combination_technique(self, qoi, samples=None, **kwargs):
584        """Efficient quadrature formulation for (sparse) grids. See:
585
586            Gerstner, Griebel, "Numerical integration using sparse grids"
587            Uses the general combination technique (page 12).
588
589        Parameters
590        ----------
591        qoi : str
592            name of the qoi
593
594        samples : array
595            compute the mean by setting samples = self.samples.
596            To compute the variance, set samples = (self.samples - mean)**2
597        """
598
599        if samples is None:
600            samples = self.samples[qoi]
601
602        # In the case of quadrature-based refinement, we need to specify
603        # l_norm, comb_coef and xi_d other than the current defualt values
604        if 'l_norm' in kwargs:
605            l_norm = kwargs['l_norm']
606        else:
607            l_norm = self.l_norm
608
609        if 'comb_coef' in kwargs:
610            comb_coef = kwargs['comb_coef']
611        else:
612            comb_coef = self.comb_coef
613
614        if 'xi_d' in kwargs:
615            xi_d = kwargs['xi_d']
616        else:
617            xi_d = self.xi_d
618
619        # quadrature Q
620        Q = 0.0
621
622        # loop over l
623        for l in l_norm:
624            # compute the tensor product of parameter and weight values
625            X_k = [self.xi_1d[n][l[n]] for n in range(self.N)]
626            W_k = [self.wi_1d[n][l[n]] for n in range(self.N)]
627
628            X_k = np.array(list(product(*X_k)))
629            W_k = np.array(list(product(*W_k)))
630            W_k = np.prod(W_k, axis=1)
631            W_k = W_k.reshape([W_k.shape[0], 1])
632
633            # scaling factor of combination technique
634            W_k = W_k * comb_coef[tuple(l)]
635
636            # find corresponding code values
637            f_k = np.array([samples[np.where((x == xi_d).all(axis=1))[0][0]] for x in X_k])
638
639            # quadrature of Q^1_{k1} X ... X Q^1_{kN} product
640            Q = Q + np.sum(f_k * W_k, axis=0).T
641
642        return Q

Efficient quadrature formulation for (sparse) grids. See:

Gerstner, Griebel, "Numerical integration using sparse grids"
Uses the general combination technique (page 12).

Parameters

qoi (str): name of the qoi
samples (array): compute the mean by setting samples = self.samples. To compute the variance, set samples = (self.samples - mean)**2

def get_moments(self, qoi): View Source

644    def get_moments(self, qoi):
645        """
646        Parameters
647        ----------
648        qoi : str
649            name of the qoi
650
651        Returns
652        -------
653        mean and variance of qoi (float (N_qoi,))
654        """
655        logging.debug('Computing moments...')
656        # compute mean
657        mean_f = self.quadrature(qoi)
658        # compute variance
659        variance_samples = [(sample - mean_f)**2 for sample in self.samples[qoi]]
660        var_f = self.quadrature(qoi, samples=variance_samples)
661        logging.debug('done')
662        return mean_f, var_f

Parameters

qoi (str): name of the qoi

Returns

mean and variance of qoi (float (N_qoi,))

def sc_expansion(self, samples, x): View Source

664    def sc_expansion(self, samples, x):
665        """
666        Non recursive implementation of the SC expansion. Performs interpolation
667        of code output samples for both full and sparse grids.
668
669        Parameters
670        ----------
671        samples : list
672            list of code output samples.
673        x : array
674            One or more locations in stochastic space at which to evaluate
675            the surrogate.
676
677        Returns
678        -------
679        surr : array
680            The interpolated values of the code output at input locations
681            specified by x.
682
683        """
684        # Computing the tensor grid of each multiindex l (xi_d below)
685        # every time is slow. Instead store it globally, and only recompute when
686        # self.l_norm has changed, when the flag init_interpolation = True.
687        # This flag is set to True when self.analyse is executed
688        if self.init_interpolation:
689            self.xi_d_per_l = {}
690            for l in self.l_norm:
691                # all points corresponding to l
692                xi = [self.xi_1d[n][l[n]] for n in range(self.N)]
693                self.xi_d_per_l[tuple(l)] = np.array(list(product(*xi)))
694            self.init_interpolation = False
695
696        surr = 0.0
697        for l in self.l_norm:
698            # all points corresponding to l
699            # xi = [self.xi_1d[n][l[n]] for n in range(self.N)]
700            # xi_d = np.array(list(product(*xi)))
701            xi_d = self.xi_d_per_l[tuple(l)]
702
703            for xi in xi_d:
704                # indices of current collocation point
705                # in corresponding 1d colloc points (self.xi_1d[n][l[n]])
706                # These are the j of the 1D lagrange polynomials l_j(x), see
707                # lagrange_poly subroutine
708                idx = [(self.xi_1d[n][l[n]] == xi[n]).nonzero()[0][0] for n in range(self.N)]
709                # index of the code sample
710                sample_idx = np.where((xi == self.xi_d).all(axis=1))[0][0]
711
712                # values of Lagrange polynomials at x
713                if x.ndim == 1:
714                    weight = [lagrange_poly(x[n], self.xi_1d[n][l[n]], idx[n])
715                              for n in range(self.N)]
716                    surr += self.comb_coef[tuple(l)] * samples[sample_idx] * np.prod(weight, axis=0)
717                # batch setting, if multiple x values are presribed
718                else:
719                    weight = [lagrange_poly(x[:, n], self.xi_1d[n][l[n]], idx[n])
720                              for n in range(self.N)]
721                    surr += self.comb_coef[tuple(l)] * samples[sample_idx] * \
722                        np.prod(weight, axis=0).reshape([-1, 1])
723
724        return surr

Non recursive implementation of the SC expansion. Performs interpolation of code output samples for both full and sparse grids.

Parameters

samples (list): list of code output samples.
x (array): One or more locations in stochastic space at which to evaluate the surrogate.

Returns

surr (array): The interpolated values of the code output at input locations specified by x.

def get_sample_array(self, qoi): View Source

726    def get_sample_array(self, qoi):
727        """
728        Parameters
729        ----------
730        qoi : str
731            name of quantity of interest
732
733        Returns
734        -------
735        array of all samples of qoi
736        """
737        return np.array([self.samples[qoi][k] for k in range(len(self.samples[qoi]))])

Parameters

qoi (str): name of quantity of interest

Returns

array of all samples of qoi

def adaptation_histogram(self): View Source

739    def adaptation_histogram(self):
740        """Plots a bar chart of the maximum order of the quadrature rule
741        that is used in each dimension. Use in case of the dimension adaptive
742        sampler to get an idea of which parameters were more refined than others.
743        This gives only a first-order idea, as it only plots the max quad
744        order independently per input parameter, so higher-order refinements
745        that were made do not show up in the bar chart.
746        """
747        import matplotlib.pyplot as plt
748
749        fig = plt.figure('adapt_hist', figsize=[4, 8])
750        ax = fig.add_subplot(111, ylabel='max quadrature order',
751                             title='Number of refinements = %d'
752                             % self.sampler.nadaptations)
753        # find max quad order for every parameter
754        adapt_measure = np.max(self.l_norm, axis=0)
755        ax.bar(range(adapt_measure.size), height=adapt_measure - 1)
756        params = list(self.sampler.vary.get_keys())
757        ax.set_xticks(range(adapt_measure.size))
758        ax.set_xticklabels(params)
759        plt.xticks(rotation=90)
760        plt.tight_layout()
761        plt.show()

Plots a bar chart of the maximum order of the quadrature rule that is used in each dimension. Use in case of the dimension adaptive sampler to get an idea of which parameters were more refined than others. This gives only a first-order idea, as it only plots the max quad order independently per input parameter, so higher-order refinements that were made do not show up in the bar chart.

def adaptation_table(self, **kwargs): View Source

763    def adaptation_table(self, **kwargs):
764        """Plots a color-coded table of the quadrature-order refinement.
765        Shows in what order the parameters were refined, and unlike
766        adaptation_histogram, this also shows higher-order refinements.
767
768        Parameters
769        ----------
770        **kwargs: can contain kwarg 'order' to specify the order in which
771        the variables on the x axis are plotted (e.g. in order of decreasing
772        1st order Sobol index).
773
774        Returns
775        -------
776        None.
777
778        """
779
780        # if specified, plot the variables on the x axis in a given order
781        if 'order' in kwargs:
782            order = kwargs['order']
783        else:
784            order = range(self.N)
785
786        l = np.copy(self.l_norm)[:, order]
787
788        import matplotlib as mpl
789        import matplotlib.pyplot as plt
790
791        fig = plt.figure(figsize=[12, 6])
792        ax = fig.add_subplot(111)
793
794        # max quad order
795        M = np.max(l)
796        cmap = plt.get_cmap('Purples', M)
797        # plot 'heat map' of refinement
798        im = plt.imshow(l.T, cmap=cmap, aspect='auto')
799        norm = mpl.colors.Normalize(vmin=0, vmax=M - 1)
800        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
801        sm.set_array([])
802        cb = plt.colorbar(im)
803        # plot the quad order in the middle of the colorbar intervals
804        p = np.linspace(1, M, M+1)
805        tick_p = 0.5 * (p[1:] + p[0:-1])
806        cb.set_ticks(tick_p)
807        cb.set_ticklabels(np.arange(M))
808        cb.set_label(r'quadrature order')
809        # plot the variables names on the x axis
810        ax.set_yticks(range(l.shape[1]))
811        params = np.array(list(self.sampler.vary.get_keys()))
812        ax.set_yticklabels(params[order], fontsize=12)
813        # ax.set_yticks(range(l.shape[0]))
814        ax.set_xlabel('iteration')
815        # plt.yticks(rotation=90)
816        plt.tight_layout()
817        plt.show()

Plots a color-coded table of the quadrature-order refinement. Shows in what order the parameters were refined, and unlike adaptation_histogram, this also shows higher-order refinements.

Parameters

**kwargs (can contain kwarg 'order' to specify the order in which):
the variables on the x axis are plotted (e.g. in order of decreasing
1st order Sobol index).

Returns

None.

def plot_grid(self): View Source

819    def plot_grid(self):
820        """Plots the collocation points for 2 and 3 dimensional problems
821        """
822        import matplotlib.pyplot as plt
823
824        if self.N == 2:
825            fig = plt.figure()
826            ax = fig.add_subplot(111, xlabel=r'$x_1$', ylabel=r'$x_2$')
827            ax.plot(self.xi_d[:, 0], self.xi_d[:, 1], 'ro')
828            plt.tight_layout()
829            plt.show()
830        elif self.N == 3:
831            from mpl_toolkits.mplot3d import Axes3D
832            fig = plt.figure()
833            ax = fig.add_subplot(111, projection='3d', xlabel=r'$x_1$',
834                                 ylabel=r'$x_2$', zlabel=r'$x_3$')
835            ax.scatter(self.xi_d[:, 0], self.xi_d[:, 1], self.xi_d[:, 2])
836            plt.tight_layout()
837            plt.show()
838        else:
839            logging.debug('Will only plot for N = 2 or N = 3.')

Plots the collocation points for 2 and 3 dimensional problems

def SC2PCE(self, samples, qoi, verbose=True, **kwargs): View Source

841    def SC2PCE(self, samples, qoi, verbose=True, **kwargs):
842        """Computes the Polynomials Chaos Expansion coefficients from the SC
843        expansion via a transformation of basis (Lagrange polynomials basis -->
844        orthonomial basis).
845
846        Parameters
847        ----------
848        samples : array
849            SC code samples from which to compute the PCE coefficients
850
851        qoi : string
852            Name of the QoI.
853
854        Returns
855        -------
856        pce_coefs : dict
857            PCE coefficients per multi index l
858        """
859        pce_coefs = {}
860
861        if 'l_norm' in kwargs:
862            l_norm = kwargs['l_norm']
863        else:
864            l_norm = self.l_norm
865
866        if 'xi_d' in kwargs:
867            xi_d = kwargs['xi_d']
868        else:
869            xi_d = self.xi_d
870
871        # if not hasattr(self, 'pce_coefs'):
872        #     self.pce_coefs = {}
873
874        count_l = 1
875        for l in l_norm:
876            if not tuple(l) in self.pce_coefs[qoi].keys():
877                # pce coefficients for current multi-index l
878                pce_coefs[tuple(l)] = {}
879
880                # 1d points generated by l
881                x_1d = [self.xi_1d[n][l[n]] for n in range(self.N)]
882                # 1d Lagrange polynomials generated by l
883                # EDIT: do not use chaospy for Lagrange, converts lagrange into monomial, requires
884                # Vandermonde matrix inversion to find coefficients, which becomes
885                # very ill conditioned rather quickly. Can no longer use cp.E to compute
886                # integrals, use GQ instead
887                # a_1d = [cp.lagrange_polynomial(sampler.xi_1d[n][l[n]]) for n in range(d)]
888
889                # N-dimensional grid generated by l
890                x_l = np.array(list(product(*x_1d)))
891
892                # all multi indices of the PCE expansion: k <= l
893                k_norm = list(product(*[np.arange(1, l[n] + 1) for n in range(self.N)]))
894
895                if verbose:
896                    logging.debug('Computing PCE coefficients %d / %d' % (count_l, l_norm.shape[0]))
897
898                for k in k_norm:
899                    # product of the PCE basis function or order k - 1 and all
900                    # Lagrange basis functions in a_1d, per dimension
901                    # [[phi_k[0]*a_1d[0]], ..., [phi_k[N-1]*a_1d[N-1]]]
902
903                    # orthogonal polynomial generated by chaospy
904                    phi_k = [cp.expansion.stieltjes(k[n] - 1,
905                                                    dist=self.sampler.params_distribution[n],
906                                                    normed=True)[-1] for n in range(self.N)]
907
908                    # the polynomial order of each integrand phi_k*a_j = (k - 1) + (number of
909                    # colloc. points - 1)
910                    orders = [(k[n] - 1) + (self.xi_1d[n][l[n]].size - 1) for n in range(self.N)]
911
912                    # will hold the products of PCE basis functions phi_k and lagrange
913                    # interpolation polynomials a_1d
914                    cross_prod = []
915
916                    for n in range(self.N):
917                        # GQ using n points can exactly integrate polynomials of order 2n-1:
918                        # solve for required number of points n when given order
919                        n_quad_points = int(np.ceil((orders[n] + 1) / 2))
920
921                        # generate Gaussian quad rule
922                        if isinstance(self.sampler.params_distribution[n], cp.DiscreteUniform):
923                            xi = self.xi_1d[n][l[n]]
924                            wi = self.wi_1d[n][l[n]]
925                        else:
926                            xi, wi = cp.generate_quadrature(
927                                n_quad_points - 1, self.sampler.params_distribution[n], rule="G")
928                            xi = xi[0]
929
930                        # number of colloc points = number of Lagrange polynomials
931                        n_lagrange_poly = int(self.xi_1d[n][l[n]].size)
932
933                        # compute the v coefficients = coefficients of SC2PCE mapping
934                        v_coefs_n = []
935                        for j in range(n_lagrange_poly):
936                            # compute values of Lagrange polys at quadrature points
937                            l_j = np.array([lagrange_poly(xi[i], self.xi_1d[n][l[n]], j)
938                                            for i in range(xi.size)])
939                            # each coef is the integral of the lagrange poly times the current
940                            # orthogonal PCE poly
941                            v_coefs_n.append(np.sum(l_j * phi_k[n](xi) * wi))
942                        cross_prod.append(v_coefs_n)
943
944                    # tensor product of all integrals
945                    integrals = np.array(list(product(*cross_prod)))
946                    # multiply over the number of parameters: v_prod = v_k1_j1 * ... * v_kd_jd
947                    v_prod = np.prod(integrals, axis=1)
948                    v_prod = v_prod.reshape([v_prod.size, 1])
949
950                    # find corresponding code values
951                    f_k = np.array([samples[np.where((x == xi_d).all(axis=1))[0][0]] for x in x_l])
952
953                    # the sum of all code sample * v_{k,j_1} * ... * v_{k,j_N}
954                    # equals the PCE coefficient
955                    eta_k = np.sum(f_k * v_prod, axis=0).T
956
957                    pce_coefs[tuple(l)][tuple(k)] = eta_k
958            else:
959                # pce coefs previously computed, just copy result
960                pce_coefs[tuple(l)] = self.pce_coefs[qoi][tuple(l)]
961            count_l += 1
962
963        logging.debug('done')
964        return pce_coefs

Computes the Polynomials Chaos Expansion coefficients from the SC expansion via a transformation of basis (Lagrange polynomials basis --> orthonomial basis).

Parameters

samples (array): SC code samples from which to compute the PCE coefficients
qoi (string): Name of the QoI.

Returns

pce_coefs (dict): PCE coefficients per multi index l

def generalized_pce_coefs(self, l_norm, pce_coefs, comb_coef): View Source

 966    def generalized_pce_coefs(self, l_norm, pce_coefs, comb_coef):
 967        """
 968        Computes the generalized PCE coefficients, defined as the linear combibation
 969        of PCE coefficients which make it possible to write the dimension-adaptive
 970        PCE expansion in standard form. See DOI: 10.13140/RG.2.2.18085.58083/1
 971
 972        Parameters
 973        ----------
 974        l_norm : array
 975            array of quadrature order multi indices
 976        pce_coefs : tuple
 977            tuple of PCE coefficients computed by SC2PCE subroutine
 978        comb_coef : tuple
 979            tuple of combination coefficients computed by compute_comb_coef
 980
 981        Returns
 982        -------
 983        gen_pce_coefs : tuple
 984            The generalized PCE coefficients, indexed per multi index.
 985
 986        """
 987        assert self.sparse, "Generalized PCE coeffcients are computed only for sparse grids"
 988
 989        # the set of all forward neighbours of l: {k | k >= l}
 990        F_l = {}
 991        # the generalized PCE coefs, which turn the adaptive PCE into a standard PCE expansion
 992        gen_pce_coefs = {}
 993        for l in l_norm:
 994            # {indices of k | k >= l}
 995            idx = np.where((l <= l_norm).all(axis=1))[0]
 996            F_l[tuple(l)] = l_norm[idx]
 997
 998            # the generalized PCE coefs are comb_coef[k] * pce_coefs[k][l], summed over k
 999            # for a fixed l
1000            gen_pce_coefs[tuple(l)] = 0.0
1001            for k in F_l[tuple(l)]:
1002                gen_pce_coefs[tuple(l)] += comb_coef[tuple(k)] * pce_coefs[tuple(k)][tuple(l)]
1003
1004        return gen_pce_coefs

Computes the generalized PCE coefficients, defined as the linear combibation of PCE coefficients which make it possible to write the dimension-adaptive PCE expansion in standard form. See DOI: 10.13140/RG.2.2.18085.58083/1

Parameters

l_norm (array): array of quadrature order multi indices
pce_coefs (tuple): tuple of PCE coefficients computed by SC2PCE subroutine
comb_coef (tuple): tuple of combination coefficients computed by compute_comb_coef

Returns

gen_pce_coefs (tuple): The generalized PCE coefficients, indexed per multi index.

def get_pce_stats(self, l_norm, pce_coefs, comb_coef): View Source

1006    def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
1007        """Compute the mean and the variance based on the generalized PCE coefficients
1008        See DOI: 10.13140/RG.2.2.18085.58083/1
1009
1010        Parameters
1011        ----------
1012        l_norm : array
1013            array of quadrature order multi indices
1014        pce_coefs : tuple
1015            tuple of PCE coefficients computed by SC2PCE subroutine
1016        comb_coef : tuple
1017            tuple of combination coefficients computed by compute_comb_coef
1018
1019        Returns
1020        -------
1021        mean, variance and generalized pce coefficients
1022        """
1023
1024        gen_pce_coefs = self.generalized_pce_coefs(l_norm, pce_coefs, comb_coef)
1025
1026        # with the generalized pce coefs, the standard PCE formulas for the mean and var
1027        # can be used for the dimension-adaptive PCE
1028
1029        # the PCE mean is just the 1st generalized PCE coef
1030        l1 = tuple(np.ones(self.N, dtype=int))
1031        mean = gen_pce_coefs[l1]
1032
1033        # the variance is the sum of the squared generalized PCE coefs, excluding the 1st coef
1034        D = 0.0
1035        for l in l_norm[1:]:
1036            D += gen_pce_coefs[tuple(l)] ** 2
1037
1038        if type(D) is float:
1039            D = np.array([D])
1040
1041        return mean, D, gen_pce_coefs

Compute the mean and the variance based on the generalized PCE coefficients See DOI: 10.13140/RG.2.2.18085.58083/1

Parameters

l_norm (array): array of quadrature order multi indices
pce_coefs (tuple): tuple of PCE coefficients computed by SC2PCE subroutine
comb_coef (tuple): tuple of combination coefficients computed by compute_comb_coef

Returns

mean, variance and generalized pce coefficients

def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs): View Source

1043    def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
1044        """Computes Sobol indices using Polynomials Chaos coefficients. These
1045        coefficients are computed from the SC expansion via a transformation
1046        of basis (SC2PCE subroutine). This works better than computing the
1047        Sobol indices directly from the SC expansion in the case of the
1048        dimension-adaptive sampler. See DOI: 10.13140/RG.2.2.18085.58083/1
1049
1050        Method: J.D. Jakeman et al, "Adaptive multi-index collocation
1051        for uncertainty quantification and sensitivity analysis", 2019.
1052        (Page 18)
1053
1054        Parameters
1055        ----------
1056        qoi : str
1057            name of the Quantity of Interest for which to compute the indices
1058        typ : str
1059            Default = 'first_order'. 'all' is also possible
1060        **kwargs : dict
1061            if this contains 'samples', use these instead of the SC samples ]
1062            in the database
1063
1064        Returns
1065        -------
1066        Tuple
1067            Mean: PCE mean
1068            Var: PCE variance
1069            S_u: PCE Sobol indices, either the first order indices or all indices
1070        """
1071
1072        if 'samples' in kwargs:
1073            samples = kwargs['samples']
1074            N_qoi = samples[0].size
1075        else:
1076            samples = self.samples[qoi]
1077            N_qoi = self.N_qoi[qoi]
1078
1079        # compute the (generalized) PCE coefficients and stats
1080        self.pce_coefs[qoi] = self.SC2PCE(samples, qoi)
1081        mean, D, gen_pce_coefs = self.get_pce_stats(
1082            self.l_norm, self.pce_coefs[qoi], self.comb_coef)
1083
1084        logging.debug('Computing Sobol indices...')
1085        # Universe = (0, 1, ..., N - 1)
1086        U = np.arange(self.N)
1087
1088        # the powerset of U for either the first order or all Sobol indices
1089        if typ == 'first_order':
1090            P = [()]
1091            for i in range(self.N):
1092                P.append((i,))
1093        else:
1094            # all indices u
1095            P = list(powerset(U))
1096
1097        # dict to hold the partial Sobol variances and Sobol indices
1098        D_u = {}
1099        S_u = {}
1100        for u in P[1:]:
1101            # complement of u
1102            u_prime = np.delete(U, u)
1103            k = []
1104            D_u[u] = np.zeros(N_qoi)
1105            S_u[u] = np.zeros(N_qoi)
1106
1107            # compute the set of multi indices corresponding to varying ONLY
1108            # the inputs indexed by u
1109            for l in self.l_norm:
1110                # assume l_i = 1 for all i in u' until found otherwise
1111                all_ones = True
1112                for i_up in u_prime:
1113                    if l[i_up] != 1:
1114                        all_ones = False
1115                        break
1116                # if l_i = 1 for all i in u'
1117                if all_ones:
1118                    # assume all l_i for i in u are > 1
1119                    all_gt_one = True
1120                    for i_u in u:
1121                        if l[i_u] == 1:
1122                            all_gt_one = False
1123                            break
1124                    # if both conditions above are True, the current l varies
1125                    # only inputs indexed by u, add this l to k
1126                    if all_gt_one:
1127                        k.append(l)
1128
1129            logging.debug('Multi indices of dimension  %s are %s' % (u, k))
1130            # the partial variance of u is the sum of all variances index by k
1131            for k_u in k:
1132                D_u[u] = D_u[u] + gen_pce_coefs[tuple(k_u)] ** 2
1133
1134            # normalize D_u by total variance D to get the Sobol index
1135            if 0 in D:
1136                with warnings.catch_warnings():
1137                    # ignore divide by zero warning
1138                    warnings.simplefilter("ignore")
1139                    S_u[u] = D_u[u] / D    
1140            else:
1141                S_u[u] = D_u[u] / D
1142
1143        logging.debug('done')
1144        return mean, D, D_u, S_u

Computes Sobol indices using Polynomials Chaos coefficients. These coefficients are computed from the SC expansion via a transformation of basis (SC2PCE subroutine). This works better than computing the Sobol indices directly from the SC expansion in the case of the dimension-adaptive sampler. See DOI: 10.13140/RG.2.2.18085.58083/1

Method: J.D. Jakeman et al, "Adaptive multi-index collocation for uncertainty quantification and sensitivity analysis", 2019. (Page 18)

Parameters

qoi (str): name of the Quantity of Interest for which to compute the indices
typ (str): Default = 'first_order'. 'all' is also possible
**kwargs (dict): if this contains 'samples', use these instead of the SC samples ] in the database

Returns

Tuple: Mean: PCE mean Var: PCE variance S_u: PCE Sobol indices, either the first order indices or all indices

@staticmethod

def compute_tensor_prod_u(xi, wi, u, u_prime): View Source

1148    @staticmethod
1149    def compute_tensor_prod_u(xi, wi, u, u_prime):
1150        """
1151        Calculate tensor products of weights and collocation points
1152        with dimension of u and u'
1153
1154        Parameters
1155        ----------
1156        xi (array of floats): 1D colloction points
1157        wi (array of floats): 1D quadrature weights
1158        u  (array of int): dimensions
1159        u_prime (array of int): remaining dimensions (u union u' = range(N))
1160
1161        Returns
1162        dict of tensor products of weight and points for dimensions u and u'
1163        -------
1164
1165        """
1166
1167        # tensor products with dimension of u
1168        xi_u = [xi[key] for key in u]
1169        wi_u = [wi[key] for key in u]
1170
1171        xi_d_u = np.array(list(product(*xi_u)))
1172        wi_d_u = np.array(list(product(*wi_u)))
1173
1174        # tensor products with dimension of u' (complement of u)
1175        xi_u_prime = [xi[key] for key in u_prime]
1176        wi_u_prime = [wi[key] for key in u_prime]
1177
1178        xi_d_u_prime = np.array(list(product(*xi_u_prime)))
1179        wi_d_u_prime = np.array(list(product(*wi_u_prime)))
1180
1181        return xi_d_u, wi_d_u, xi_d_u_prime, wi_d_u_prime

Calculate tensor products of weights and collocation points with dimension of u and u'

Parameters

xi (array of floats) (1D colloction points):
wi (array of floats) (1D quadrature weights):
u (array of int) (dimensions):
u_prime (array of int) (remaining dimensions (u union u' = range(N))):
Returns
dict of tensor products of weight and points for dimensions u and u'
-------

def compute_marginal(self, qoi, u, u_prime, diff): View Source

1183    def compute_marginal(self, qoi, u, u_prime, diff):
1184        """
1185        Computes a marginal integral of the qoi(x) over the dimension defined
1186        by u_prime, for every x value in dimensions u
1187
1188        Parameters
1189        ----------
1190        - qoi (str): name of the quantity of interest
1191        - u (array of int): dimensions which are not integrated
1192        - u_prime (array of int): dimensions which are integrated
1193        - diff (array of int): levels
1194
1195        Returns
1196        - Values of the marginal integral
1197        -------
1198
1199        """
1200        # 1d weights and points of the levels in diff
1201        xi = [self.xi_1d[n][np.abs(diff)[n]] for n in range(self.N)]
1202        wi = [self.wi_1d[n][np.abs(diff)[n]] for n in range(self.N)]
1203
1204        # compute tensor products and weights in dimension u and u'
1205        xi_d_u, wi_d_u, xi_d_u_prime, wi_d_u_prime =\
1206            self.compute_tensor_prod_u(xi, wi, u, u_prime)
1207
1208        idxs = np.argsort(np.concatenate((u, u_prime)))
1209        # marginals h = f*w' integrated over u', so cardinality is that of u
1210        h = [0.0] * xi_d_u.shape[0]
1211        for i_u, xi_d_u_ in enumerate(xi_d_u):
1212            for i_up, xi_d_u_prime_ in enumerate(xi_d_u_prime):
1213                xi_s = np.concatenate((xi_d_u_, xi_d_u_prime_))[idxs]
1214                # find the index of the corresponding code sample
1215                idx = np.where(np.prod(self.xi_d == xi_s, axis=1))[0][0]
1216                # perform quadrature
1217                q_k = self.samples[qoi][idx]
1218                h[i_u] += q_k * wi_d_u_prime[i_up].prod()
1219        # return marginal and the weights of dimensions u
1220        return h, wi_d_u

Computes a marginal integral of the qoi(x) over the dimension defined by u_prime, for every x value in dimensions u

Parameters

- qoi (str) (name of the quantity of interest):
- u (array of int) (dimensions which are not integrated):
- u_prime (array of int) (dimensions which are integrated):
- diff (array of int) (levels):
Returns
- Values of the marginal integral
-------

def get_sobol_indices(self, qoi, typ='first_order'): View Source

1222    def get_sobol_indices(self, qoi, typ='first_order'):
1223        """
1224        Computes Sobol indices using Stochastic Collocation. Method:
1225        Tang (2009), GLOBAL SENSITIVITY ANALYSIS  FOR STOCHASTIC COLLOCATION
1226        EXPANSION.
1227
1228        Parameters
1229        ----------
1230        qoi (str): name of the Quantity of Interest for which to compute the indices
1231        typ (str): Default = 'first_order'. 'all' is also possible
1232
1233        Returns
1234        -------
1235        Either the first order or all Sobol indices of qoi
1236        """
1237        logging.debug('Computing Sobol indices...')
1238        # multi indices
1239        U = np.arange(self.N)
1240
1241        if typ == 'first_order':
1242            P = list(powerset(U))[0:self.N + 1]
1243        elif typ == 'all':
1244            # all indices u
1245            P = list(powerset(U))
1246        else:
1247            logging.debug('Specified Sobol Index type %s not recognized' % typ)
1248            logging.debug('Accepted are first_order or all')
1249            import sys
1250            sys.exit()
1251
1252        # get first two moments
1253        mu, D = self.get_moments(qoi)
1254
1255        # partial variances
1256        D_u = {P[0]: mu**2}
1257
1258        sobol = {}
1259
1260        for u in P[1:]:
1261
1262            # complement of u
1263            u_prime = np.delete(U, u)
1264            D_u[u] = 0.0
1265
1266            for l in self.l_norm:
1267
1268                # expand the multi-index indices of the tensor product
1269                # (Q^1_{i1} - Q^1_{i1-1}) X ... X (Q^1_{id) - Q^1_{id-1})
1270                diff_idx = np.array(list(product(*[[k, -(k - 1)] for k in l])))
1271
1272                # perform analysis on each Q^1_l1 X ... X Q^1_l_N tensor prod
1273                for diff in diff_idx:
1274
1275                    # if any Q^1_li is below the minimim level, Q^1_li is defined
1276                    # as zero: do not compute this Q^1_l1 X ... X Q^1_l_N tensor prod
1277                    if not (np.abs(diff) < self.l_norm_min).any():
1278
1279                        # mariginal integral h, integrate over dimensions u'
1280                        h, wi_d_u = self.compute_marginal(qoi, u, u_prime, diff)
1281
1282                        # square result and integrate over remaining dimensions u
1283                        for i_u in range(wi_d_u.shape[0]):
1284                            D_u[u] += np.sign(np.prod(diff)) * h[i_u]**2 * wi_d_u[i_u].prod()
1285
1286                # D_u[u] = D_u[u].flatten()
1287
1288            # all subsets of u
1289            W = list(powerset(u))[0:-1]
1290
1291            # partial variance of u
1292            for w in W:
1293                D_u[u] -= D_u[w]
1294
1295            # compute Sobol index, only include points where D > 0
1296            if 0 in D:
1297                with warnings.catch_warnings():
1298                    # ignore divide by zero warning
1299                    warnings.simplefilter("ignore")
1300                    sobol[u] = D_u[u] / D                  
1301            else:
1302                sobol[u] = D_u[u] / D
1303
1304        logging.debug('done.')
1305        return sobol

Computes Sobol indices using Stochastic Collocation. Method: Tang (2009), GLOBAL SENSITIVITY ANALYSIS FOR STOCHASTIC COLLOCATION EXPANSION.

Parameters

qoi (str) (name of the Quantity of Interest for which to compute the indices):
typ (str) (Default = 'first_order'. 'all' is also possible):

Returns

Either the first order or all Sobol indices of qoi

def get_uncertainty_amplification(self, qoi): View Source

1307    def get_uncertainty_amplification(self, qoi):
1308        """
1309        Computes a measure that signifies the ratio of output to input
1310        uncertainty. It is computed as the (mean) Coefficient of Variation (V)
1311        of the output divided by the (mean) CV of the input.
1312
1313        Parameters
1314        ----------
1315        qoi (string): name of the Quantity of Interest
1316
1317        Returns
1318        -------
1319        blowup (float): the ratio output CV / input CV
1320
1321        """
1322
1323        mean_f, var_f = self.get_moments(qoi)
1324        std_f = np.sqrt(var_f)
1325
1326        mean_xi = []
1327        std_xi = []
1328        CV_xi = []
1329        for param in self.sampler.params_distribution:
1330            E = cp.E(param)
1331            Std = cp.Std(param)
1332            mean_xi.append(E)
1333            std_xi.append(Std)
1334            CV_xi.append(Std / E)
1335
1336        CV_in = np.mean(CV_xi)
1337        CV_out = std_f / mean_f
1338        idx = np.where(np.isnan(CV_out) == False)[0]
1339        CV_out = np.mean(CV_out[idx])
1340        blowup = CV_out / CV_in
1341
1342        print('-----------------')
1343        print('Mean CV input = %.4f %%' % (100 * CV_in, ))
1344        print('Mean CV output = %.4f %%' % (100 * CV_out, ))
1345        print('Uncertainty amplification factor = %.4f/%.4f = %.4f' %
1346              (CV_out, CV_in, blowup))
1347        print('-----------------')
1348
1349        return blowup

Computes a measure that signifies the ratio of output to input uncertainty. It is computed as the (mean) Coefficient of Variation (V) of the output divided by the (mean) CV of the input.

Parameters

qoi (string) (name of the Quantity of Interest):

Returns

blowup (float) (the ratio output CV / input CV):

Inherited Members

easyvvuq.analysis.base.BaseAnalysisElement: element_category

def powerset(iterable): View Source

1352def powerset(iterable):
1353    """powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)
1354
1355    Taken from: https://docs.python.org/3/library/itertools.html#recipes
1356
1357    Parameters
1358    ----------
1359    iterable : iterable
1360        Input sequence
1361
1362    Returns
1363    -------
1364
1365    """
1366
1367    s = list(iterable)
1368    return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))

powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)

Taken from: https://docs.python.org/3/library/itertools.html#recipes

Parameters

iterable (iterable): Input sequence
Returns
-------

def lagrange_poly(x, x_i, j): View Source

1371def lagrange_poly(x, x_i, j):
1372    """Lagrange polynomials used for interpolation
1373
1374    l_j(x) = product(x - x_m / x_j - x_m) with 0 <= m <= k
1375                                               and m !=j
1376
1377    Parameters
1378    ----------
1379    x : float
1380        location at which to compute the polynomial
1381
1382    x_i : list or array of float
1383        nodes of the Lagrange polynomials
1384
1385    j : int
1386        index of node at which l_j(x_j) = 1
1387
1388    Returns
1389    -------
1390    float
1391        l_j(x) calculated as shown above.
1392    """
1393    l_j = 1.0
1394
1395    for m in range(len(x_i)):
1396
1397        if m != j:
1398            denom = x_i[j] - x_i[m]
1399            nom = x - x_i[m]
1400
1401            l_j *= nom / denom
1402
1403    return l_j
1404    # implementation below is more beautiful, but slower
1405    # x_i_ = np.delete(x_i, j)
1406    # return np.prod((x - x_i_) / (x_i[j] - x_i_))

Lagrange polynomials used for interpolation

l_j(x) = product(x - x_m / x_j - x_m) with 0 <= m <= k and m !=j

Parameters

x (float): location at which to compute the polynomial
x_i (list or array of float): nodes of the Lagrange polynomials
j (int): index of node at which l_j(x_j) = 1

Returns

float: l_j(x) calculated as shown above.

def setdiff2d(X, Y): View Source

1409def setdiff2d(X, Y):
1410    """
1411    Computes the difference of two 2D arrays X and Y
1412
1413    Parameters
1414    ----------
1415    X : 2D numpy array
1416    Y : 2D numpy array
1417
1418    Returns
1419    -------
1420    The difference X \\ Y as a 2D array
1421
1422    """
1423    diff = set(map(tuple, X)) - set(map(tuple, Y))
1424    return np.array(list(diff))

Computes the difference of two 2D arrays X and Y

Parameters

X (2D numpy array):
Y (2D numpy array):

Returns

The difference X \ Y as a 2D array