easyvvuq.analysis.ensemble_boot

Provides analysis element for ensemble bootstrapping analysis.

  1"""Provides analysis element for ensemble bootstrapping analysis.
  2"""
  3import numpy as np
  4import pandas as pd
  5from easyvvuq import OutputType
  6from .base import BaseAnalysisElement
  7
  8__copyright__ = """
  9
 10    Copyright 2018 Robin A. Richardson, David W. Wright
 11
 12    This file is part of EasyVVUQ
 13
 14    EasyVVUQ is free software: you can redistribute it and/or modify
 15    it under the terms of the Lesser GNU General Public License as published by
 16    the Free Software Foundation, either version 3 of the License, or
 17    (at your option) any later version.
 18
 19    EasyVVUQ is distributed in the hope that it will be useful,
 20    but WITHOUT ANY WARRANTY; without even the implied warranty of
 21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 22    Lesser GNU General Public License for more details.
 23
 24    You should have received a copy of the Lesser GNU General Public License
 25    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 26
 27"""
 28__license__ = "LGPL"
 29
 30
 31def confidence_interval(dist, value, alpha, pivotal=False):
 32    """
 33    Get the bootstrap confidence interval for a given distribution.
 34
 35    Parameters
 36    ----------
 37    dist:
 38        Array containing distribution of bootstrap results.
 39    value:
 40        Value of statistic for which we are calculating error bars.
 41    alpha:
 42        The alpha value for the confidence intervals.
 43    pivotal:
 44        Use the pivotal method? Default to percentile method.
 45
 46    Returns
 47    -------
 48
 49    float:
 50          Value of the bootstrap statistic
 51    float:
 52          Highest value of the confidence interval
 53    float:
 54          Lowest value of the confidence interval
 55
 56    """
 57    if len(dist) < 1:
 58        raise ValueError("Dist array should be non-empty")
 59
 60    if pivotal:
 61
 62        low = 2 * value - np.percentile(dist, 100 * (1 - alpha / 2.), axis=0)
 63        stat = value
 64        high = 2 * value - np.percentile(dist, 100 * (alpha / 2.), axis=0)
 65
 66    else:
 67
 68        low = np.percentile(dist, 100 * (alpha / 2.), axis=0)
 69        stat = np.percentile(dist, 50)
 70        high = np.percentile(dist, 100 * (1 - alpha / 2.), axis=0)
 71
 72    # if low > high:
 73    #     (low, high) = (high, low)
 74
 75    return stat, low, high
 76
 77
 78def bootstrap(data, stat_func, alpha=0.05,
 79              sample_size=None, n_samples=1000,
 80              pivotal=False):
 81    """
 82
 83    Parameters
 84    ----------
 85    data : :obj:`pandas.DataFrame`
 86        Input data to be analysed.
 87    stat_func : function
 88        Statistical function to be applied to data for bootstrapping.
 89    alpha : float
 90        Produce estimate of 100.0*(1-`alpha`) confidence interval.
 91    sample_size : int
 92        Size of the sample to be drawn from the input data.
 93    n_samples : int
 94        Number of times samples are to be drawn from the input data.
 95    pivotal : bool
 96        Use the pivotal method? Default to percentile method.
 97
 98    Returns
 99    -------
100    float:
101          Value of the bootstrap statistic
102    float:
103          Highest value of the confidence interval
104    float:
105          Lowest value of the confidence interval
106    """
107    if data.empty:
108        raise RuntimeError("DataFrame passed to bootstrap has to be non-empty")
109
110    stat = data.apply(stat_func)
111
112    if sample_size is None:
113        sample_size = len(data)
114
115    dist = []
116
117    for l in range(n_samples):
118
119        sample = data.sample(sample_size, replace=True)
120
121        dist.append(stat_func(sample))
122
123    return confidence_interval(dist, stat, alpha, pivotal=pivotal)
124
125
126def ensemble_bootstrap(data, groupby=[], qoi_cols=[],
127                       stat_func=np.mean, alpha=0.05,
128                       sample_size=None, n_samples=1000,
129                       pivotal=False, stat_name='boot'):
130    """
131    Perform bootstrapping analysis on input data.
132
133    Parameters
134    ----------
135    data : :obj:`pandas.DataFrame`
136        DataFrame to be analysed.
137    groupby : list or None
138        Columns to use to group the data in `analyse` method before
139        calculating stats.
140    qoi_cols : list or None
141        Columns of quantities of interest (for which stats will be
142        calculated).
143    stat_func : function
144        Statistical function to be applied to data for bootstrapping.
145    alpha : float, default=0.05
146        Produce estimate of 100.0*(1-`alpha`) confidence interval.
147    sample_size : int
148        Size of the sample to be drawn from the input data.
149    n_samples : int, default=1000
150        Number of times samples are to be drawn from the input data.
151    pivotal : bool, default=False
152        Use the pivotal method? Default to percentile method.
153    stat_name : str, default='boot'
154        Name to use to describe columns containing output statistic (for example
155        'mean').
156
157    Returns
158    -------
159    :obj:`pandas.DataFrame`
160        Description of input data using bootstrap statistic and high/low
161        confidence intervals.
162    """
163
164    agg_funcs = {}
165
166    if not qoi_cols:
167        qoi_cols = [
168            x for x in data.columns if x not in groupby + ['run_id', 'status']]
169
170    for col in qoi_cols:
171        if col not in data:
172            raise RuntimeError(f"No such attribute: {col}\nAttributes found in data: {data}")
173        agg_funcs[col] = lambda x: bootstrap(
174            x,
175            stat_func=stat_func,
176            alpha=alpha,
177            sample_size=sample_size,
178            n_samples=n_samples,
179            pivotal=pivotal)
180
181    if not groupby:
182        grouped_data = data.groupby(lambda x: True, sort=False)
183    else:
184        grouped_data = data.groupby(groupby, sort=False)
185
186    # Apply bootstrapping to all value columns selected
187    # Note results come a tuple per cell
188    results = grouped_data.agg(agg_funcs)
189
190    outputs = [stat_name, 'low', 'high']
191
192    # Split out tuples in each cell and provide sensible naming
193    results = pd.concat({col: results[col].apply(
194        lambda cell: pd.Series(cell, index=outputs)
195    )
196        for col in qoi_cols}, axis=1)
197
198    return results
199
200
201class EnsembleBoot(BaseAnalysisElement):
202
203    def __init__(self, groupby=[], qoi_cols=[],
204                 stat_func=np.mean, alpha=0.05,
205                 sample_size=None, n_boot_samples=1000,
206                 pivotal=False, stat_name='boot'):
207        """
208        Element to perform bootstrapping on collated simulation output.
209
210        Parameters
211        ----------
212        groupby : list or None
213            Columns to use to group the data in `analyse` method before
214            calculating stats.
215        qoi_cols : list or None
216            Columns of quantities of interest (for which stats will be
217            calculated).
218        stat_func : function
219            Statistical function to be applied to data for bootstrapping.
220        alpha : float, default=0.05
221            Produce estimate of 100.0*(1-`alpha`) confidence interval.
222        sample_size : int
223            Size of the sample to be drawn from the input data.
224        n_boot_samples : int, default=1000
225            Number of times samples are to be drawn from the input data.
226        pivotal : bool, default=False
227            Use the pivotal method? Default to percentile method.
228        stat_name : str, default='boot'
229            Name to use to describe columns containing output statistic (for example
230            'mean').
231        """
232
233        self.groupby = groupby
234        self.qoi_cols = qoi_cols
235
236        self.stat_func = stat_func
237        self.alpha = alpha
238        self.sample_size = sample_size
239        self.n_boot_samples = n_boot_samples
240        self.pivotal = pivotal
241        self.stat_name = stat_name
242
243        self.output_type = OutputType.SUMMARY
244
245        if self.stat_func is None:
246            raise ValueError('stat_func cannot be None.')
247
248    def element_name(self):
249        """Name for this element for logging purposes"""
250        return "ensemble_boot"
251
252    def element_version(self):
253        """Version of this element for logging purposes"""
254        return "0.1"
255
256    def analyse(self, data_frame=None):
257        """Perform bootstrapping analysis on the input `data_frame`.
258
259        The data_frame is grouped according to `self.groupby` if specified and
260        analysis is performed on the columns selected in `self.qoi_cols` if set.
261
262        Parameters
263        ----------
264        data_frame : :obj:`pandas.DataFrame`
265            Summary data produced through collation of simulation output.
266
267        Returns
268        -------
269        :obj:`pandas.DataFrame`
270            Basic statistic for selected columns and groupings of data.
271        """
272
273        if data_frame is None:
274            raise RuntimeError(
275                "This VVUQ element needs a data frame to analyse")
276        elif data_frame.empty:
277            raise RuntimeError(
278                "No data in data frame passed to analyse element")
279
280        results = ensemble_bootstrap(
281            data_frame,
282            groupby=self.groupby,
283            qoi_cols=self.qoi_cols,
284            stat_func=self.stat_func,
285            alpha=self.alpha,
286            sample_size=self.sample_size,
287            n_samples=self.n_boot_samples,
288            pivotal=self.pivotal,
289            stat_name=self.stat_name)
290
291        return results
292
293class EnsembleBootMultiple(BaseAnalysisElement):
294
295    def __init__(self, groupby=[], qoi_cols=[],
296                 stat_func=[np.mean], alpha=0.05,
297                 sample_size=None, n_boot_samples=1000,
298                 pivotal=False, stat_name=None):
299        """
300        Element to perform bootstrapping on collated simulation output.
301
302        Parameters
303        ----------
304        groupby : list or None
305            Columns to use to group the data in `analyse` method before
306            calculating stats.
307        qoi_cols : list or None
308            Columns of quantities of interest (for which stats will be
309            calculated).
310        stat_func : list[function]
311            List of statistical functions to be applied to data for bootstrapping.
312        alpha : float, default=0.05
313            Produce estimate of 100.0*(1-`alpha`) confidence interval.
314        sample_size : int
315            Size of the sample to be drawn from the input data.
316        n_boot_samples : int, default=1000
317            Number of times samples are to be drawn from the input data.
318        pivotal : bool, default=False
319            Use the pivotal method? Default to percentile method.
320        stat_name : str, default=None
321            Name to use to describe columns containing output statistic (for example
322            'mean'). If not provided, then attr '__name__' from each func is used.
323        """
324
325        if not stat_func or stat_func is None:
326            raise ValueError('stat_func cannot be empty or None')
327
328        self.groupby = groupby
329        self.qoi_cols = qoi_cols
330
331        self.stat_func = stat_func
332        self.alpha = alpha
333        self.sample_size = sample_size
334        self.n_boot_samples = n_boot_samples
335        self.pivotal = pivotal
336        self.stat_name = stat_name if stat_name is not None else [func.__name__ for func in stat_func]
337
338        self.output_type = OutputType.SUMMARY
339
340    def element_name(self):
341        """Name for this element for logging purposes"""
342        return "ensemble_boot_multiple"
343
344    def element_version(self):
345        """Version of this element for logging purposes"""
346        return "0.1"
347
348    def analyse(self, data_frame=None):
349        """Perform bootstrapping analysis on the input `data_frame`.
350
351        The data_frame is grouped according to `self.groupby` if specified and
352        analysis is performed on the columns selected in `self.qoi_cols` if set.
353
354        Parameters
355        ----------
356        data_frame : :obj:`pandas.DataFrame`
357            Summary data produced through collation of simulation output.
358
359        Returns
360        -------
361        :obj:`pandas.DataFrame`
362            Basic statistic for selected columns and groupings of data.
363        """
364
365        if data_frame is None:
366            raise RuntimeError(
367                    "This VVUQ element needs a data frame to analyse")
368        elif data_frame.empty:
369            raise RuntimeError(
370                    "No data in data frame passed to analyse element")
371        frames = []
372        for stat_func, stat_name in zip(self.stat_func, self.stat_name):
373            results = ensemble_bootstrap(
374                    data_frame,
375                    groupby=self.groupby,
376                    qoi_cols=self.qoi_cols,
377                    stat_func=stat_func,
378                    alpha=self.alpha,
379                    sample_size=self.sample_size,
380                    n_samples=self.n_boot_samples,
381                    pivotal=self.pivotal,
382                    stat_name=stat_name)
383            frames.append(results)
384        return pd.concat(frames, axis=1, keys=self.stat_name).swaplevel(0, 1, axis=1)
def confidence_interval(dist, value, alpha, pivotal=False):
32def confidence_interval(dist, value, alpha, pivotal=False):
33    """
34    Get the bootstrap confidence interval for a given distribution.
35
36    Parameters
37    ----------
38    dist:
39        Array containing distribution of bootstrap results.
40    value:
41        Value of statistic for which we are calculating error bars.
42    alpha:
43        The alpha value for the confidence intervals.
44    pivotal:
45        Use the pivotal method? Default to percentile method.
46
47    Returns
48    -------
49
50    float:
51          Value of the bootstrap statistic
52    float:
53          Highest value of the confidence interval
54    float:
55          Lowest value of the confidence interval
56
57    """
58    if len(dist) < 1:
59        raise ValueError("Dist array should be non-empty")
60
61    if pivotal:
62
63        low = 2 * value - np.percentile(dist, 100 * (1 - alpha / 2.), axis=0)
64        stat = value
65        high = 2 * value - np.percentile(dist, 100 * (alpha / 2.), axis=0)
66
67    else:
68
69        low = np.percentile(dist, 100 * (alpha / 2.), axis=0)
70        stat = np.percentile(dist, 50)
71        high = np.percentile(dist, 100 * (1 - alpha / 2.), axis=0)
72
73    # if low > high:
74    #     (low, high) = (high, low)
75
76    return stat, low, high

Get the bootstrap confidence interval for a given distribution.

Parameters
  • dist:: Array containing distribution of bootstrap results.
  • value:: Value of statistic for which we are calculating error bars.
  • alpha:: The alpha value for the confidence intervals.
  • pivotal:: Use the pivotal method? Default to percentile method.
Returns
  • float:: Value of the bootstrap statistic
  • float:: Highest value of the confidence interval
  • float:: Lowest value of the confidence interval
def bootstrap( data, stat_func, alpha=0.05, sample_size=None, n_samples=1000, pivotal=False):
 79def bootstrap(data, stat_func, alpha=0.05,
 80              sample_size=None, n_samples=1000,
 81              pivotal=False):
 82    """
 83
 84    Parameters
 85    ----------
 86    data : :obj:`pandas.DataFrame`
 87        Input data to be analysed.
 88    stat_func : function
 89        Statistical function to be applied to data for bootstrapping.
 90    alpha : float
 91        Produce estimate of 100.0*(1-`alpha`) confidence interval.
 92    sample_size : int
 93        Size of the sample to be drawn from the input data.
 94    n_samples : int
 95        Number of times samples are to be drawn from the input data.
 96    pivotal : bool
 97        Use the pivotal method? Default to percentile method.
 98
 99    Returns
100    -------
101    float:
102          Value of the bootstrap statistic
103    float:
104          Highest value of the confidence interval
105    float:
106          Lowest value of the confidence interval
107    """
108    if data.empty:
109        raise RuntimeError("DataFrame passed to bootstrap has to be non-empty")
110
111    stat = data.apply(stat_func)
112
113    if sample_size is None:
114        sample_size = len(data)
115
116    dist = []
117
118    for l in range(n_samples):
119
120        sample = data.sample(sample_size, replace=True)
121
122        dist.append(stat_func(sample))
123
124    return confidence_interval(dist, stat, alpha, pivotal=pivotal)
Parameters
  • data (pandas.DataFrame): Input data to be analysed.
  • stat_func (function): Statistical function to be applied to data for bootstrapping.
  • alpha (float): Produce estimate of 100.0*(1-alpha) confidence interval.
  • sample_size (int): Size of the sample to be drawn from the input data.
  • n_samples (int): Number of times samples are to be drawn from the input data.
  • pivotal (bool): Use the pivotal method? Default to percentile method.
Returns
  • float:: Value of the bootstrap statistic
  • float:: Highest value of the confidence interval
  • float:: Lowest value of the confidence interval
def ensemble_bootstrap( data, groupby=[], qoi_cols=[], stat_func=<function mean>, alpha=0.05, sample_size=None, n_samples=1000, pivotal=False, stat_name='boot'):
127def ensemble_bootstrap(data, groupby=[], qoi_cols=[],
128                       stat_func=np.mean, alpha=0.05,
129                       sample_size=None, n_samples=1000,
130                       pivotal=False, stat_name='boot'):
131    """
132    Perform bootstrapping analysis on input data.
133
134    Parameters
135    ----------
136    data : :obj:`pandas.DataFrame`
137        DataFrame to be analysed.
138    groupby : list or None
139        Columns to use to group the data in `analyse` method before
140        calculating stats.
141    qoi_cols : list or None
142        Columns of quantities of interest (for which stats will be
143        calculated).
144    stat_func : function
145        Statistical function to be applied to data for bootstrapping.
146    alpha : float, default=0.05
147        Produce estimate of 100.0*(1-`alpha`) confidence interval.
148    sample_size : int
149        Size of the sample to be drawn from the input data.
150    n_samples : int, default=1000
151        Number of times samples are to be drawn from the input data.
152    pivotal : bool, default=False
153        Use the pivotal method? Default to percentile method.
154    stat_name : str, default='boot'
155        Name to use to describe columns containing output statistic (for example
156        'mean').
157
158    Returns
159    -------
160    :obj:`pandas.DataFrame`
161        Description of input data using bootstrap statistic and high/low
162        confidence intervals.
163    """
164
165    agg_funcs = {}
166
167    if not qoi_cols:
168        qoi_cols = [
169            x for x in data.columns if x not in groupby + ['run_id', 'status']]
170
171    for col in qoi_cols:
172        if col not in data:
173            raise RuntimeError(f"No such attribute: {col}\nAttributes found in data: {data}")
174        agg_funcs[col] = lambda x: bootstrap(
175            x,
176            stat_func=stat_func,
177            alpha=alpha,
178            sample_size=sample_size,
179            n_samples=n_samples,
180            pivotal=pivotal)
181
182    if not groupby:
183        grouped_data = data.groupby(lambda x: True, sort=False)
184    else:
185        grouped_data = data.groupby(groupby, sort=False)
186
187    # Apply bootstrapping to all value columns selected
188    # Note results come a tuple per cell
189    results = grouped_data.agg(agg_funcs)
190
191    outputs = [stat_name, 'low', 'high']
192
193    # Split out tuples in each cell and provide sensible naming
194    results = pd.concat({col: results[col].apply(
195        lambda cell: pd.Series(cell, index=outputs)
196    )
197        for col in qoi_cols}, axis=1)
198
199    return results

Perform bootstrapping analysis on input data.

Parameters
  • data (pandas.DataFrame): DataFrame to be analysed.
  • groupby (list or None): Columns to use to group the data in analyse method before calculating stats.
  • qoi_cols (list or None): Columns of quantities of interest (for which stats will be calculated).
  • stat_func (function): Statistical function to be applied to data for bootstrapping.
  • alpha (float, default=0.05): Produce estimate of 100.0*(1-alpha) confidence interval.
  • sample_size (int): Size of the sample to be drawn from the input data.
  • n_samples (int, default=1000): Number of times samples are to be drawn from the input data.
  • pivotal (bool, default=False): Use the pivotal method? Default to percentile method.
  • stat_name (str, default='boot'): Name to use to describe columns containing output statistic (for example 'mean').
Returns
  • pandas.DataFrame: Description of input data using bootstrap statistic and high/low confidence intervals.
class EnsembleBoot(easyvvuq.analysis.base.BaseAnalysisElement):
202class EnsembleBoot(BaseAnalysisElement):
203
204    def __init__(self, groupby=[], qoi_cols=[],
205                 stat_func=np.mean, alpha=0.05,
206                 sample_size=None, n_boot_samples=1000,
207                 pivotal=False, stat_name='boot'):
208        """
209        Element to perform bootstrapping on collated simulation output.
210
211        Parameters
212        ----------
213        groupby : list or None
214            Columns to use to group the data in `analyse` method before
215            calculating stats.
216        qoi_cols : list or None
217            Columns of quantities of interest (for which stats will be
218            calculated).
219        stat_func : function
220            Statistical function to be applied to data for bootstrapping.
221        alpha : float, default=0.05
222            Produce estimate of 100.0*(1-`alpha`) confidence interval.
223        sample_size : int
224            Size of the sample to be drawn from the input data.
225        n_boot_samples : int, default=1000
226            Number of times samples are to be drawn from the input data.
227        pivotal : bool, default=False
228            Use the pivotal method? Default to percentile method.
229        stat_name : str, default='boot'
230            Name to use to describe columns containing output statistic (for example
231            'mean').
232        """
233
234        self.groupby = groupby
235        self.qoi_cols = qoi_cols
236
237        self.stat_func = stat_func
238        self.alpha = alpha
239        self.sample_size = sample_size
240        self.n_boot_samples = n_boot_samples
241        self.pivotal = pivotal
242        self.stat_name = stat_name
243
244        self.output_type = OutputType.SUMMARY
245
246        if self.stat_func is None:
247            raise ValueError('stat_func cannot be None.')
248
249    def element_name(self):
250        """Name for this element for logging purposes"""
251        return "ensemble_boot"
252
253    def element_version(self):
254        """Version of this element for logging purposes"""
255        return "0.1"
256
257    def analyse(self, data_frame=None):
258        """Perform bootstrapping analysis on the input `data_frame`.
259
260        The data_frame is grouped according to `self.groupby` if specified and
261        analysis is performed on the columns selected in `self.qoi_cols` if set.
262
263        Parameters
264        ----------
265        data_frame : :obj:`pandas.DataFrame`
266            Summary data produced through collation of simulation output.
267
268        Returns
269        -------
270        :obj:`pandas.DataFrame`
271            Basic statistic for selected columns and groupings of data.
272        """
273
274        if data_frame is None:
275            raise RuntimeError(
276                "This VVUQ element needs a data frame to analyse")
277        elif data_frame.empty:
278            raise RuntimeError(
279                "No data in data frame passed to analyse element")
280
281        results = ensemble_bootstrap(
282            data_frame,
283            groupby=self.groupby,
284            qoi_cols=self.qoi_cols,
285            stat_func=self.stat_func,
286            alpha=self.alpha,
287            sample_size=self.sample_size,
288            n_samples=self.n_boot_samples,
289            pivotal=self.pivotal,
290            stat_name=self.stat_name)
291
292        return results

Base class for all EasyVVUQ analysis elements.

Attributes

EnsembleBoot( groupby=[], qoi_cols=[], stat_func=<function mean>, alpha=0.05, sample_size=None, n_boot_samples=1000, pivotal=False, stat_name='boot')
204    def __init__(self, groupby=[], qoi_cols=[],
205                 stat_func=np.mean, alpha=0.05,
206                 sample_size=None, n_boot_samples=1000,
207                 pivotal=False, stat_name='boot'):
208        """
209        Element to perform bootstrapping on collated simulation output.
210
211        Parameters
212        ----------
213        groupby : list or None
214            Columns to use to group the data in `analyse` method before
215            calculating stats.
216        qoi_cols : list or None
217            Columns of quantities of interest (for which stats will be
218            calculated).
219        stat_func : function
220            Statistical function to be applied to data for bootstrapping.
221        alpha : float, default=0.05
222            Produce estimate of 100.0*(1-`alpha`) confidence interval.
223        sample_size : int
224            Size of the sample to be drawn from the input data.
225        n_boot_samples : int, default=1000
226            Number of times samples are to be drawn from the input data.
227        pivotal : bool, default=False
228            Use the pivotal method? Default to percentile method.
229        stat_name : str, default='boot'
230            Name to use to describe columns containing output statistic (for example
231            'mean').
232        """
233
234        self.groupby = groupby
235        self.qoi_cols = qoi_cols
236
237        self.stat_func = stat_func
238        self.alpha = alpha
239        self.sample_size = sample_size
240        self.n_boot_samples = n_boot_samples
241        self.pivotal = pivotal
242        self.stat_name = stat_name
243
244        self.output_type = OutputType.SUMMARY
245
246        if self.stat_func is None:
247            raise ValueError('stat_func cannot be None.')

Element to perform bootstrapping on collated simulation output.

Parameters
  • groupby (list or None): Columns to use to group the data in analyse method before calculating stats.
  • qoi_cols (list or None): Columns of quantities of interest (for which stats will be calculated).
  • stat_func (function): Statistical function to be applied to data for bootstrapping.
  • alpha (float, default=0.05): Produce estimate of 100.0*(1-alpha) confidence interval.
  • sample_size (int): Size of the sample to be drawn from the input data.
  • n_boot_samples (int, default=1000): Number of times samples are to be drawn from the input data.
  • pivotal (bool, default=False): Use the pivotal method? Default to percentile method.
  • stat_name (str, default='boot'): Name to use to describe columns containing output statistic (for example 'mean').
groupby
qoi_cols
stat_func
alpha
sample_size
n_boot_samples
pivotal
stat_name
output_type
def element_name(self):
249    def element_name(self):
250        """Name for this element for logging purposes"""
251        return "ensemble_boot"

Name for this element for logging purposes

def element_version(self):
253    def element_version(self):
254        """Version of this element for logging purposes"""
255        return "0.1"

Version of this element for logging purposes

def analyse(self, data_frame=None):
257    def analyse(self, data_frame=None):
258        """Perform bootstrapping analysis on the input `data_frame`.
259
260        The data_frame is grouped according to `self.groupby` if specified and
261        analysis is performed on the columns selected in `self.qoi_cols` if set.
262
263        Parameters
264        ----------
265        data_frame : :obj:`pandas.DataFrame`
266            Summary data produced through collation of simulation output.
267
268        Returns
269        -------
270        :obj:`pandas.DataFrame`
271            Basic statistic for selected columns and groupings of data.
272        """
273
274        if data_frame is None:
275            raise RuntimeError(
276                "This VVUQ element needs a data frame to analyse")
277        elif data_frame.empty:
278            raise RuntimeError(
279                "No data in data frame passed to analyse element")
280
281        results = ensemble_bootstrap(
282            data_frame,
283            groupby=self.groupby,
284            qoi_cols=self.qoi_cols,
285            stat_func=self.stat_func,
286            alpha=self.alpha,
287            sample_size=self.sample_size,
288            n_samples=self.n_boot_samples,
289            pivotal=self.pivotal,
290            stat_name=self.stat_name)
291
292        return results

Perform bootstrapping analysis on the input data_frame.

The data_frame is grouped according to self.groupby if specified and analysis is performed on the columns selected in self.qoi_cols if set.

Parameters
  • data_frame (pandas.DataFrame): Summary data produced through collation of simulation output.
Returns
  • pandas.DataFrame: Basic statistic for selected columns and groupings of data.
class EnsembleBootMultiple(easyvvuq.analysis.base.BaseAnalysisElement):
294class EnsembleBootMultiple(BaseAnalysisElement):
295
296    def __init__(self, groupby=[], qoi_cols=[],
297                 stat_func=[np.mean], alpha=0.05,
298                 sample_size=None, n_boot_samples=1000,
299                 pivotal=False, stat_name=None):
300        """
301        Element to perform bootstrapping on collated simulation output.
302
303        Parameters
304        ----------
305        groupby : list or None
306            Columns to use to group the data in `analyse` method before
307            calculating stats.
308        qoi_cols : list or None
309            Columns of quantities of interest (for which stats will be
310            calculated).
311        stat_func : list[function]
312            List of statistical functions to be applied to data for bootstrapping.
313        alpha : float, default=0.05
314            Produce estimate of 100.0*(1-`alpha`) confidence interval.
315        sample_size : int
316            Size of the sample to be drawn from the input data.
317        n_boot_samples : int, default=1000
318            Number of times samples are to be drawn from the input data.
319        pivotal : bool, default=False
320            Use the pivotal method? Default to percentile method.
321        stat_name : str, default=None
322            Name to use to describe columns containing output statistic (for example
323            'mean'). If not provided, then attr '__name__' from each func is used.
324        """
325
326        if not stat_func or stat_func is None:
327            raise ValueError('stat_func cannot be empty or None')
328
329        self.groupby = groupby
330        self.qoi_cols = qoi_cols
331
332        self.stat_func = stat_func
333        self.alpha = alpha
334        self.sample_size = sample_size
335        self.n_boot_samples = n_boot_samples
336        self.pivotal = pivotal
337        self.stat_name = stat_name if stat_name is not None else [func.__name__ for func in stat_func]
338
339        self.output_type = OutputType.SUMMARY
340
341    def element_name(self):
342        """Name for this element for logging purposes"""
343        return "ensemble_boot_multiple"
344
345    def element_version(self):
346        """Version of this element for logging purposes"""
347        return "0.1"
348
349    def analyse(self, data_frame=None):
350        """Perform bootstrapping analysis on the input `data_frame`.
351
352        The data_frame is grouped according to `self.groupby` if specified and
353        analysis is performed on the columns selected in `self.qoi_cols` if set.
354
355        Parameters
356        ----------
357        data_frame : :obj:`pandas.DataFrame`
358            Summary data produced through collation of simulation output.
359
360        Returns
361        -------
362        :obj:`pandas.DataFrame`
363            Basic statistic for selected columns and groupings of data.
364        """
365
366        if data_frame is None:
367            raise RuntimeError(
368                    "This VVUQ element needs a data frame to analyse")
369        elif data_frame.empty:
370            raise RuntimeError(
371                    "No data in data frame passed to analyse element")
372        frames = []
373        for stat_func, stat_name in zip(self.stat_func, self.stat_name):
374            results = ensemble_bootstrap(
375                    data_frame,
376                    groupby=self.groupby,
377                    qoi_cols=self.qoi_cols,
378                    stat_func=stat_func,
379                    alpha=self.alpha,
380                    sample_size=self.sample_size,
381                    n_samples=self.n_boot_samples,
382                    pivotal=self.pivotal,
383                    stat_name=stat_name)
384            frames.append(results)
385        return pd.concat(frames, axis=1, keys=self.stat_name).swaplevel(0, 1, axis=1)

Base class for all EasyVVUQ analysis elements.

Attributes

EnsembleBootMultiple( groupby=[], qoi_cols=[], stat_func=[<function mean>], alpha=0.05, sample_size=None, n_boot_samples=1000, pivotal=False, stat_name=None)
296    def __init__(self, groupby=[], qoi_cols=[],
297                 stat_func=[np.mean], alpha=0.05,
298                 sample_size=None, n_boot_samples=1000,
299                 pivotal=False, stat_name=None):
300        """
301        Element to perform bootstrapping on collated simulation output.
302
303        Parameters
304        ----------
305        groupby : list or None
306            Columns to use to group the data in `analyse` method before
307            calculating stats.
308        qoi_cols : list or None
309            Columns of quantities of interest (for which stats will be
310            calculated).
311        stat_func : list[function]
312            List of statistical functions to be applied to data for bootstrapping.
313        alpha : float, default=0.05
314            Produce estimate of 100.0*(1-`alpha`) confidence interval.
315        sample_size : int
316            Size of the sample to be drawn from the input data.
317        n_boot_samples : int, default=1000
318            Number of times samples are to be drawn from the input data.
319        pivotal : bool, default=False
320            Use the pivotal method? Default to percentile method.
321        stat_name : str, default=None
322            Name to use to describe columns containing output statistic (for example
323            'mean'). If not provided, then attr '__name__' from each func is used.
324        """
325
326        if not stat_func or stat_func is None:
327            raise ValueError('stat_func cannot be empty or None')
328
329        self.groupby = groupby
330        self.qoi_cols = qoi_cols
331
332        self.stat_func = stat_func
333        self.alpha = alpha
334        self.sample_size = sample_size
335        self.n_boot_samples = n_boot_samples
336        self.pivotal = pivotal
337        self.stat_name = stat_name if stat_name is not None else [func.__name__ for func in stat_func]
338
339        self.output_type = OutputType.SUMMARY

Element to perform bootstrapping on collated simulation output.

Parameters
  • groupby (list or None): Columns to use to group the data in analyse method before calculating stats.
  • qoi_cols (list or None): Columns of quantities of interest (for which stats will be calculated).
  • stat_func (list[function]): List of statistical functions to be applied to data for bootstrapping.
  • alpha (float, default=0.05): Produce estimate of 100.0*(1-alpha) confidence interval.
  • sample_size (int): Size of the sample to be drawn from the input data.
  • n_boot_samples (int, default=1000): Number of times samples are to be drawn from the input data.
  • pivotal (bool, default=False): Use the pivotal method? Default to percentile method.
  • stat_name (str, default=None): Name to use to describe columns containing output statistic (for example 'mean'). If not provided, then attr '__name__' from each func is used.
groupby
qoi_cols
stat_func
alpha
sample_size
n_boot_samples
pivotal
stat_name
output_type
def element_name(self):
341    def element_name(self):
342        """Name for this element for logging purposes"""
343        return "ensemble_boot_multiple"

Name for this element for logging purposes

def element_version(self):
345    def element_version(self):
346        """Version of this element for logging purposes"""
347        return "0.1"

Version of this element for logging purposes

def analyse(self, data_frame=None):
349    def analyse(self, data_frame=None):
350        """Perform bootstrapping analysis on the input `data_frame`.
351
352        The data_frame is grouped according to `self.groupby` if specified and
353        analysis is performed on the columns selected in `self.qoi_cols` if set.
354
355        Parameters
356        ----------
357        data_frame : :obj:`pandas.DataFrame`
358            Summary data produced through collation of simulation output.
359
360        Returns
361        -------
362        :obj:`pandas.DataFrame`
363            Basic statistic for selected columns and groupings of data.
364        """
365
366        if data_frame is None:
367            raise RuntimeError(
368                    "This VVUQ element needs a data frame to analyse")
369        elif data_frame.empty:
370            raise RuntimeError(
371                    "No data in data frame passed to analyse element")
372        frames = []
373        for stat_func, stat_name in zip(self.stat_func, self.stat_name):
374            results = ensemble_bootstrap(
375                    data_frame,
376                    groupby=self.groupby,
377                    qoi_cols=self.qoi_cols,
378                    stat_func=stat_func,
379                    alpha=self.alpha,
380                    sample_size=self.sample_size,
381                    n_samples=self.n_boot_samples,
382                    pivotal=self.pivotal,
383                    stat_name=stat_name)
384            frames.append(results)
385        return pd.concat(frames, axis=1, keys=self.stat_name).swaplevel(0, 1, axis=1)

Perform bootstrapping analysis on the input data_frame.

The data_frame is grouped according to self.groupby if specified and analysis is performed on the columns selected in self.qoi_cols if set.

Parameters
  • data_frame (pandas.DataFrame): Summary data produced through collation of simulation output.
Returns
  • pandas.DataFrame: Basic statistic for selected columns and groupings of data.