easyvvuq.analysis.ssc_analysis

ANALYSIS CLASS FOR THE SSC SAMPLER

View Source

  1"""
  2ANALYSIS CLASS FOR THE SSC SAMPLER
  3"""
  4
  5import numpy as np
  6import pickle
  7import copy
  8from easyvvuq import OutputType
  9from .base import BaseAnalysisElement
 10from .results import AnalysisResults
 11# import logging
 12# from scipy.special import comb
 13import pandas as pd
 14import matplotlib.pyplot as plt
 15from matplotlib.patches import Polygon
 16
 17__author__ = "Wouter Edeling"
 18__copyright__ = """
 19
 20    Copyright 2018 Robin A. Richardson, David W. Wright
 21
 22    This file is part of EasyVVUQ
 23
 24    EasyVVUQ is free software: you can redistribute it and/or modify
 25    it under the terms of the Lesser GNU General Public License as published by
 26    the Free Software Foundation, either version 3 of the License, or
 27    (at your option) any later version.
 28
 29    EasyVVUQ is distributed in the hope that it will be useful,
 30    but WITHOUT ANY WARRANTY; without even the implied warranty of
 31    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 32    Lesser GNU General Public License for more details.
 33
 34    You should have received a copy of the Lesser GNU General Public License
 35    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 36
 37"""
 38__license__ = "LGPL"
 39
 40
 41class SSCAnalysisResults(AnalysisResults):
 42    # def _get_sobols_first(self, qoi, input_):
 43    #     raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
 44    #     result = raw_dict[AnalysisResults._to_tuple(qoi)][input_]
 45    #     try:
 46    #         return np.array([float(result)])
 47    #     except TypeError:
 48    #         return np.array(result)
 49
 50    def supported_stats(self):
 51        """Types of statistics supported by the describe method.
 52
 53        Returns
 54        -------
 55        list of str
 56        """
 57        return ['mean', 'var', 'std']
 58
 59    def _describe(self, qoi, statistic):
 60        if statistic in self.supported_stats():
 61            return self.raw_data['statistical_moments'][qoi][statistic]
 62        else:
 63            raise NotImplementedError
 64
 65    def surrogate(self):
 66        """Return a SSC surrogate model.
 67
 68        Returns
 69        -------
 70        A function that takes a dictionary of parameter - value pairs and returns
 71        a dictionary with the results (same output as decoder).
 72        """
 73        def surrogate_fn(inputs):
 74            def swap(x):
 75                if x.size > 1:
 76                    return list(x)
 77                else:
 78                    return x[0]
 79            values = np.squeeze(np.array([inputs[key] for key in self.inputs])).T
 80            results = dict([(qoi, swap(self.surrogate_(qoi, values))) for qoi in self.qois])
 81            return results
 82        return surrogate_fn
 83
 84
 85class SSCAnalysis(BaseAnalysisElement):
 86    """
 87    SSc analysis class.
 88    """
 89
 90    def __init__(self, sampler=None, qoi_cols=None):
 91        """
 92        Parameters
 93        ----------
 94        sampler : SSCSampler
 95            Sampler used to initiate the SSC analysis
 96        qoi_cols : list or None
 97            Column names for quantities of interest (for which analysis is
 98            performed).
 99        """
100
101        if sampler is None:
102            msg = 'SSC analysis requires a paired sampler to be passed'
103            raise RuntimeError(msg)
104
105        if qoi_cols is None:
106            raise RuntimeError("Analysis element requires a list of "
107                               "quantities of interest (qoi)")
108
109        self.qoi_cols = qoi_cols
110        self.output_type = OutputType.SUMMARY
111        self.sampler = sampler
112
113    def element_name(self):
114        """Name for this element for logging purposes"""
115        return "SSC_Analysis"
116
117    def element_version(self):
118        """Version of this element for logging purposes"""
119        return "0.1"
120
121    def save_state(self, filename):
122        """
123        Saves the complete state of the analysis object to a pickle file,
124        except the sampler object (self.sampler).
125
126        Parameters
127        ----------
128        filename : string
129            name to the file to write the state to
130        """
131        print("Saving analysis state to %s" % filename)
132        # make a copy of the state, and do not store the sampler as well
133        state = copy.copy(self.__dict__)
134        del state['sampler']
135        with open(filename, 'wb') as fp:
136            pickle.dump(state, fp)
137
138    def load_state(self, filename):
139        """
140        Loads the complete state of the analysis object from a
141        pickle file, stored using save_state.
142
143        Parameters
144        ----------
145        filename : string
146            name of the file to load
147        """
148        print("Loading analysis state from %s" % filename)
149        with open(filename, 'rb') as fp:
150            state = pickle.load(fp)
151        for key in state.keys():
152            self.__dict__[key] = state[key]
153
154    def analyse(self, data_frame=None, compute_moments=True, n_mc=20000):
155        """
156        Perform SSC analysis on input `data_frame`.
157
158        Parameters
159        ----------
160        data_frame : pandas.DataFrame
161            Input data for analysis.
162        compute_moments : bool, optional.
163            Compute the first 2 moments. Default is True.
164
165        Returns
166        -------
167        results : dict
168            A dictionary containing the statistical moments.
169        """
170
171        if data_frame is None:
172            raise RuntimeError("Analysis element needs a data frame to "
173                               "analyse")
174        elif isinstance(data_frame, pd.DataFrame) and data_frame.empty:
175            raise RuntimeError(
176                "No data in data frame passed to analyse element")
177
178        self.load_samples(data_frame)
179        # # size of one code sample
180        # # TODO: change this to include QoI of different size
181        # self.N_qoi = self.samples[qoi_cols[0]][0].size
182
183        # Compute descriptive statistics for each quantity of interest
184        results = {'statistical_moments': {}}
185
186        if compute_moments:
187            for qoi_k in self.qoi_cols:
188                mean_k, var_k = self.get_moments(qoi_k, n_mc=n_mc)
189                std_k = var_k ** 0.5
190                # compute statistical moments
191                results['statistical_moments'][qoi_k] = {'mean': mean_k,
192                                                         'var': var_k,
193                                                         'std': std_k}
194
195        results = SSCAnalysisResults(raw_data=results, samples=data_frame,
196                                     qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))
197        results.surrogate_ = self.surrogate
198        return results
199
200    def load_samples(self, data_frame):
201        """
202        Extract output values for each quantity of interest from Dataframe.
203
204        Parameters
205        ----------
206        data_frame : EasyVVUQ (pandas) data frame
207            The code samples from the EasyVVUQ data frame.
208
209        Returns
210        -------
211        None.
212
213        """
214        print('Loading samples...')
215        qoi_cols = self.qoi_cols
216        samples = {k: [] for k in qoi_cols}
217        for k in qoi_cols:
218            for run_id in data_frame[('run_id', 0)].unique():
219                values = data_frame.loc[data_frame[('run_id', 0)] == run_id][k].values
220                samples[k].append(values.flatten())
221            samples[k] = np.array(samples[k])
222        self.samples = samples
223        print('done')
224
225    def get_moments(self, qoi, n_mc):
226        """
227        Compute the mean and variance through Monte Carlo sampling of the SSC
228        surrogate. Independent random inputs samples are drawn though the
229        SSC sampler object.
230
231        Parameters
232        ----------
233        qoi : string
234            The name of the QoI.
235        n_mc : int
236            The number of Monte Carlo samples.
237
238        Returns
239        -------
240        mean : array
241            The mean of qoi.
242        var : array
243            The variance of qoi.
244
245        """
246        print('Computing mean and variance...')
247        Xi = self.sampler.sample_inputs(n_mc)
248        rvs = [self.surrogate(qoi, xi) for xi in Xi]
249        mean = np.mean(rvs)
250        var = np.var(rvs)
251        print('done.')
252        return np.array([mean]), np.array([var])
253
254    def update_surrogate(self, qoi, data_frame, max_LEC_jobs=4, n_mc_LEC=5,
255                         max_ENO_jobs=4):
256        """
257        Update the SSC surrogate given new data. Given an EasyVVUQ dataframe,
258        check the LEC condition, and compute the ENO interpolation stencils.
259
260        Parameters
261        ----------
262        qoi : string
263            The name of the QoI on the basis of which the sampling plan
264            is refined.
265        data_frame : EasyVVUQ (pandas) data frame
266            The code samples from the EasyVVUQ data frame.
267        max_LEC_jobs : int, optional
268            The number of LEC checks to perform in parallel. The default is 4.
269        n_mc_LEC : int, optional
270            The number of surrogate evaluations used in the LEC check.
271            The default is 5.
272        max_LEC_jobs : int, optional
273            The number of ENO stencils to compute in parallel. The default is 4.
274
275        Returns
276        -------
277        None. Stores the polynomials orders, interpolation stencils and
278        the simplex probabilities in analysis.p_j, analysis.S_j and
279        analysis.prob_j respectively.
280
281        """
282
283        # the number of code evaluations
284        n_s = self.sampler.n_samples
285        # the number of simplex elements
286        n_e = self.sampler.n_elements
287
288        # load the EasyVVUQ data frame
289        self.load_samples(data_frame)
290        # code outputs
291        # v = np.array(self.samples[qoi])
292        v = self.samples[qoi]
293
294        # find the max polynomial order and set the p_j = pmax
295        pmax = self.sampler.find_pmax(n_s)
296        if pmax > self.sampler.pmax_cutoff:
297            pmax = self.sampler.pmax_cutoff
298            print('Max. polynomial order set by hand to = ' + str(pmax))
299        else:
300            print('Max. polynomial order allowed by n_s = ' + str(pmax))
301
302        # polynomial order per simplex elememt
303        p_j = (np.ones(n_e) * pmax).astype('int')
304
305        # compute nearest neighbour stencils
306        S_j = self.sampler.compute_stencil_j()
307
308        # check the LEC condition of all stencil
309        res_LEC = self.sampler.check_LEC(p_j, v, S_j,
310                                         n_mc=n_mc_LEC,
311                                         max_jobs=max_LEC_jobs)
312        # updated polynomial order, stencil and el_idx are the element indices
313        # per interpolation stencil
314        p_j = res_LEC['p_j']
315        S_j = res_LEC['S_j']
316        el_idx = res_LEC['el_idx']
317
318        # convert the nearest-neighbour stencils to ENO stencils
319        S_j, p_j, el_idx = self.sampler.compute_ENO_stencil(p_j, S_j, el_idx,
320                                                            max_jobs=max_ENO_jobs)
321        # store polynomial orders and stencils
322        self.p_j = p_j
323        self.S_j = S_j
324
325        print("Updated polynomials orders = %s" % p_j)
326
327        # compute the simplex probabilities
328        self.prob_j = self.sampler.compute_probability()
329
330    def adapt_locally(self, n_new_samples=1):
331        """
332        Locally refine the sampling plan based on the SSC geometric
333        refinement measure.
334
335        Parameters
336        ----------
337        n_new_samples : int, optional
338            The number of new code evaulations to perform. The default is 1.
339
340        Returns
341        -------
342        None. Updates the Delaunay triangulation of the SSC sampler with
343        the new points. A new ensemble must be executed next.
344
345        """
346
347        if n_new_samples > self.sampler.n_elements:
348            n_new_samples = self.sampler.n_elements
349
350        # compute the refinement measures
351        eps_bar_j, vol_j = self.sampler.compute_eps_bar_j(self.p_j, self.prob_j)
352
353        # rank elements according to eps_bar_j
354        refine_idx = np.flipud(np.argsort(eps_bar_j))
355
356        # find the elements at the hypercube boundaries
357        bound_simplices = self.sampler.find_boundary_simplices()
358
359        # store which edges are refined, to prevent refining the same edge twice
360        # during the same interation
361        refined_edges = []
362
363        # the refinement locations
364        xi_k_jref = np.zeros([n_new_samples, self.sampler.n_dimensions])
365
366        i = 0
367        j = 0
368
369        # determine n_new_samples locations
370        while i < n_new_samples:
371
372            if np.in1d(refine_idx[j], bound_simplices) == False:
373                # compute the subvertices of the element chosen for refinement
374                sub_vertices = self.sampler.compute_sub_simplex_vertices(refine_idx[j])
375                # draw a random sample from the sub simplex
376                xi_k_jref[i, :] = self.sampler.sample_simplex(1, sub_vertices)
377                # if interior is refined: no danger of refining the same edge twice
378                already_refined = False
379            else:
380                print('refining edge')
381
382                xi_k_jref[i, :], refined_edges, already_refined =  \
383                    self.sampler.sample_simplex_edge(refine_idx[j], refined_edges)
384
385            if not already_refined:
386                i += 1
387                j += 1
388            else:
389                print('Edge already refined, selecting another sample.')
390                j += 1
391
392        self.sampler.update_Delaunay(xi_k_jref)
393
394    def surrogate(self, qoi, xi):
395        """
396        Evaluate the SSC surrogate at xi.
397
398        Parameters
399        ----------
400        qoi : string
401            Name of the QoI.
402        xi : array, shape (n_xi,)
403            The location in the input space at which to evaluate the
404            surrogate.
405
406        Returns
407        -------
408        array
409            The surrogate output at xi
410
411        """
412
413        if not isinstance(xi, np.ndarray):
414            xi = np.array([xi])
415
416        surr = self.sampler.surrogate(xi, self.S_j, self.p_j, self.samples[qoi])
417        return np.array([surr])
418
419    def get_sample_array(self, qoi):
420        """
421        Parameters
422        ----------
423        qoi : str
424            name of quantity of interest
425
426        Returns
427        -------
428        array of all samples of qoi
429        """
430
431        return self.samples[qoi]
432
433    def plot_grid(self):
434        """
435        Plot the 1D or 2D sampling plan and color code the simplices according
436        to their polynomial order.
437
438        Returns
439        -------
440        None.
441
442        """
443
444        assert self.sampler.n_xi == 1 or self.sampler.n_xi == 2, \
445            "Only works for 1D and 2D problems"
446
447        tri = self.sampler.tri
448        colors = ["#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3",
449                  "#fdb462", "#b3de69", "#fccde5", "#d9d9d9"]
450
451        if self.sampler.n_xi == 2:
452            # plot the delaunay grid and color it according to the local p_j
453            fig = plt.figure()
454            ax = fig.add_subplot(111, xlabel=r'$\xi_1$', ylabel=r'$\xi_2$',
455                                 xlim=[self.sampler.corners[0][0],
456                                       self.sampler.corners[0][1]],
457                                 ylim=[self.sampler.corners[1][0],
458                                       self.sampler.corners[1][1]])
459
460            for p in range(np.max(self.p_j)):
461                idx = (self.p_j == p + 1).nonzero()[0]
462                first = True
463                for i in idx:
464                    vertices = tri.points[tri.simplices[i]]
465                    if first:
466                        pg = Polygon(vertices, facecolor=colors[p], edgecolor='k',
467                                     label=r'$p_j = %d$' % (p + 1))
468                        first = False
469                    else:
470                        pg = Polygon(vertices, facecolor=colors[p], edgecolor='k')
471
472                    ax.add_patch(pg)
473
474            leg = plt.legend(loc=0)
475            leg.set_draggable(True)
476
477        elif self.sampler.n_xi == 1:
478            fig = plt.figure()
479            ax = fig.add_subplot(111, xlabel='cell center',
480                                 ylabel=r'polynomial order %s' % '$p_j$')
481            ax.plot(self.sampler.compute_xi_center_j(), self.p_j, 'o')
482            ax.vlines(self.sampler.compute_xi_center_j(), np.zeros(self.sampler.n_elements),
483                      self.p_j, linestyles='dashed')
484            ax.vlines(self.sampler.tri.points, -0.05 * np.max(self.p_j),
485                      0.05 * np.max(self.p_j))
486
487        plt.tight_layout()
488        plt.show()

class SSCAnalysisResults(easyvvuq.analysis.results.AnalysisResults): View Source

42class SSCAnalysisResults(AnalysisResults):
43    # def _get_sobols_first(self, qoi, input_):
44    #     raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
45    #     result = raw_dict[AnalysisResults._to_tuple(qoi)][input_]
46    #     try:
47    #         return np.array([float(result)])
48    #     except TypeError:
49    #         return np.array(result)
50
51    def supported_stats(self):
52        """Types of statistics supported by the describe method.
53
54        Returns
55        -------
56        list of str
57        """
58        return ['mean', 'var', 'std']
59
60    def _describe(self, qoi, statistic):
61        if statistic in self.supported_stats():
62            return self.raw_data['statistical_moments'][qoi][statistic]
63        else:
64            raise NotImplementedError
65
66    def surrogate(self):
67        """Return a SSC surrogate model.
68
69        Returns
70        -------
71        A function that takes a dictionary of parameter - value pairs and returns
72        a dictionary with the results (same output as decoder).
73        """
74        def surrogate_fn(inputs):
75            def swap(x):
76                if x.size > 1:
77                    return list(x)
78                else:
79                    return x[0]
80            values = np.squeeze(np.array([inputs[key] for key in self.inputs])).T
81            results = dict([(qoi, swap(self.surrogate_(qoi, values))) for qoi in self.qois])
82            return results
83        return surrogate_fn

Contains the analysis results.

Parameters

raw_data (obj): An arbitrary object that contains raw analysis data.
samples (pandas DataFrame): Collated samples.
qois (list of str): List of qoi names used during the analysis.
inputs (list of str): List of input names used during the analysis.

def supported_stats(self): View Source

51    def supported_stats(self):
52        """Types of statistics supported by the describe method.
53
54        Returns
55        -------
56        list of str
57        """
58        return ['mean', 'var', 'std']

Types of statistics supported by the describe method.

Returns

list of str

def surrogate(self): View Source

66    def surrogate(self):
67        """Return a SSC surrogate model.
68
69        Returns
70        -------
71        A function that takes a dictionary of parameter - value pairs and returns
72        a dictionary with the results (same output as decoder).
73        """
74        def surrogate_fn(inputs):
75            def swap(x):
76                if x.size > 1:
77                    return list(x)
78                else:
79                    return x[0]
80            values = np.squeeze(np.array([inputs[key] for key in self.inputs])).T
81            results = dict([(qoi, swap(self.surrogate_(qoi, values))) for qoi in self.qois])
82            return results
83        return surrogate_fn

Return a SSC surrogate model.

Returns

A function that takes a dictionary of parameter - value pairs and returns
a dictionary with the results (same output as decoder).

Inherited Members

easyvvuq.analysis.results.AnalysisResults: AnalysisResults; raw_data; samples; qois; inputs; derivatives_first; sobols_first; sobols_second; sobols_total; describe; plot_sobols_treemap; plot_sobols_first; plot_moments; get_distribution