easyvvuq.analysis.ensemble_boot
Provides analysis element for ensemble bootstrapping analysis.
1"""Provides analysis element for ensemble bootstrapping analysis. 2""" 3import numpy as np 4import pandas as pd 5from easyvvuq import OutputType 6from .base import BaseAnalysisElement 7 8__copyright__ = """ 9 10 Copyright 2018 Robin A. Richardson, David W. Wright 11 12 This file is part of EasyVVUQ 13 14 EasyVVUQ is free software: you can redistribute it and/or modify 15 it under the terms of the Lesser GNU General Public License as published by 16 the Free Software Foundation, either version 3 of the License, or 17 (at your option) any later version. 18 19 EasyVVUQ is distributed in the hope that it will be useful, 20 but WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 Lesser GNU General Public License for more details. 23 24 You should have received a copy of the Lesser GNU General Public License 25 along with this program. If not, see <https://www.gnu.org/licenses/>. 26 27""" 28__license__ = "LGPL" 29 30 31def confidence_interval(dist, value, alpha, pivotal=False): 32 """ 33 Get the bootstrap confidence interval for a given distribution. 34 35 Parameters 36 ---------- 37 dist: 38 Array containing distribution of bootstrap results. 39 value: 40 Value of statistic for which we are calculating error bars. 41 alpha: 42 The alpha value for the confidence intervals. 43 pivotal: 44 Use the pivotal method? Default to percentile method. 45 46 Returns 47 ------- 48 49 float: 50 Value of the bootstrap statistic 51 float: 52 Highest value of the confidence interval 53 float: 54 Lowest value of the confidence interval 55 56 """ 57 if len(dist) < 1: 58 raise ValueError("Dist array should be non-empty") 59 60 if pivotal: 61 62 low = 2 * value - np.percentile(dist, 100 * (1 - alpha / 2.), axis=0) 63 stat = value 64 high = 2 * value - np.percentile(dist, 100 * (alpha / 2.), axis=0) 65 66 else: 67 68 low = np.percentile(dist, 100 * (alpha / 2.), axis=0) 69 stat = np.percentile(dist, 50) 70 high = np.percentile(dist, 100 * (1 - alpha / 2.), axis=0) 71 72 # if low > high: 73 # (low, high) = (high, low) 74 75 return stat, low, high 76 77 78def bootstrap(data, stat_func, alpha=0.05, 79 sample_size=None, n_samples=1000, 80 pivotal=False): 81 """ 82 83 Parameters 84 ---------- 85 data : :obj:`pandas.DataFrame` 86 Input data to be analysed. 87 stat_func : function 88 Statistical function to be applied to data for bootstrapping. 89 alpha : float 90 Produce estimate of 100.0*(1-`alpha`) confidence interval. 91 sample_size : int 92 Size of the sample to be drawn from the input data. 93 n_samples : int 94 Number of times samples are to be drawn from the input data. 95 pivotal : bool 96 Use the pivotal method? Default to percentile method. 97 98 Returns 99 ------- 100 float: 101 Value of the bootstrap statistic 102 float: 103 Highest value of the confidence interval 104 float: 105 Lowest value of the confidence interval 106 """ 107 if data.empty: 108 raise RuntimeError("DataFrame passed to bootstrap has to be non-empty") 109 110 stat = data.apply(stat_func) 111 112 if sample_size is None: 113 sample_size = len(data) 114 115 dist = [] 116 117 for l in range(n_samples): 118 119 sample = data.sample(sample_size, replace=True) 120 121 dist.append(stat_func(sample)) 122 123 return confidence_interval(dist, stat, alpha, pivotal=pivotal) 124 125 126def ensemble_bootstrap(data, groupby=[], qoi_cols=[], 127 stat_func=np.mean, alpha=0.05, 128 sample_size=None, n_samples=1000, 129 pivotal=False, stat_name='boot'): 130 """ 131 Perform bootstrapping analysis on input data. 132 133 Parameters 134 ---------- 135 data : :obj:`pandas.DataFrame` 136 DataFrame to be analysed. 137 groupby : list or None 138 Columns to use to group the data in `analyse` method before 139 calculating stats. 140 qoi_cols : list or None 141 Columns of quantities of interest (for which stats will be 142 calculated). 143 stat_func : function 144 Statistical function to be applied to data for bootstrapping. 145 alpha : float, default=0.05 146 Produce estimate of 100.0*(1-`alpha`) confidence interval. 147 sample_size : int 148 Size of the sample to be drawn from the input data. 149 n_samples : int, default=1000 150 Number of times samples are to be drawn from the input data. 151 pivotal : bool, default=False 152 Use the pivotal method? Default to percentile method. 153 stat_name : str, default='boot' 154 Name to use to describe columns containing output statistic (for example 155 'mean'). 156 157 Returns 158 ------- 159 :obj:`pandas.DataFrame` 160 Description of input data using bootstrap statistic and high/low 161 confidence intervals. 162 """ 163 164 agg_funcs = {} 165 166 if not qoi_cols: 167 qoi_cols = [ 168 x for x in data.columns if x not in groupby + ['run_id', 'status']] 169 170 for col in qoi_cols: 171 if col not in data: 172 raise RuntimeError(f"No such attribute: {col}\nAttributes found in data: {data}") 173 agg_funcs[col] = lambda x: bootstrap( 174 x, 175 stat_func=stat_func, 176 alpha=alpha, 177 sample_size=sample_size, 178 n_samples=n_samples, 179 pivotal=pivotal) 180 181 if not groupby: 182 grouped_data = data.groupby(lambda x: True, sort=False) 183 else: 184 grouped_data = data.groupby(groupby, sort=False) 185 186 # Apply bootstrapping to all value columns selected 187 # Note results come a tuple per cell 188 results = grouped_data.agg(agg_funcs) 189 190 outputs = [stat_name, 'low', 'high'] 191 192 # Split out tuples in each cell and provide sensible naming 193 results = pd.concat({col: results[col].apply( 194 lambda cell: pd.Series(cell, index=outputs) 195 ) 196 for col in qoi_cols}, axis=1) 197 198 return results 199 200 201class EnsembleBoot(BaseAnalysisElement): 202 203 def __init__(self, groupby=[], qoi_cols=[], 204 stat_func=np.mean, alpha=0.05, 205 sample_size=None, n_boot_samples=1000, 206 pivotal=False, stat_name='boot'): 207 """ 208 Element to perform bootstrapping on collated simulation output. 209 210 Parameters 211 ---------- 212 groupby : list or None 213 Columns to use to group the data in `analyse` method before 214 calculating stats. 215 qoi_cols : list or None 216 Columns of quantities of interest (for which stats will be 217 calculated). 218 stat_func : function 219 Statistical function to be applied to data for bootstrapping. 220 alpha : float, default=0.05 221 Produce estimate of 100.0*(1-`alpha`) confidence interval. 222 sample_size : int 223 Size of the sample to be drawn from the input data. 224 n_boot_samples : int, default=1000 225 Number of times samples are to be drawn from the input data. 226 pivotal : bool, default=False 227 Use the pivotal method? Default to percentile method. 228 stat_name : str, default='boot' 229 Name to use to describe columns containing output statistic (for example 230 'mean'). 231 """ 232 233 self.groupby = groupby 234 self.qoi_cols = qoi_cols 235 236 self.stat_func = stat_func 237 self.alpha = alpha 238 self.sample_size = sample_size 239 self.n_boot_samples = n_boot_samples 240 self.pivotal = pivotal 241 self.stat_name = stat_name 242 243 self.output_type = OutputType.SUMMARY 244 245 if self.stat_func is None: 246 raise ValueError('stat_func cannot be None.') 247 248 def element_name(self): 249 """Name for this element for logging purposes""" 250 return "ensemble_boot" 251 252 def element_version(self): 253 """Version of this element for logging purposes""" 254 return "0.1" 255 256 def analyse(self, data_frame=None): 257 """Perform bootstrapping analysis on the input `data_frame`. 258 259 The data_frame is grouped according to `self.groupby` if specified and 260 analysis is performed on the columns selected in `self.qoi_cols` if set. 261 262 Parameters 263 ---------- 264 data_frame : :obj:`pandas.DataFrame` 265 Summary data produced through collation of simulation output. 266 267 Returns 268 ------- 269 :obj:`pandas.DataFrame` 270 Basic statistic for selected columns and groupings of data. 271 """ 272 273 if data_frame is None: 274 raise RuntimeError( 275 "This VVUQ element needs a data frame to analyse") 276 elif data_frame.empty: 277 raise RuntimeError( 278 "No data in data frame passed to analyse element") 279 280 results = ensemble_bootstrap( 281 data_frame, 282 groupby=self.groupby, 283 qoi_cols=self.qoi_cols, 284 stat_func=self.stat_func, 285 alpha=self.alpha, 286 sample_size=self.sample_size, 287 n_samples=self.n_boot_samples, 288 pivotal=self.pivotal, 289 stat_name=self.stat_name) 290 291 return results 292 293class EnsembleBootMultiple(BaseAnalysisElement): 294 295 def __init__(self, groupby=[], qoi_cols=[], 296 stat_func=[np.mean], alpha=0.05, 297 sample_size=None, n_boot_samples=1000, 298 pivotal=False, stat_name=None): 299 """ 300 Element to perform bootstrapping on collated simulation output. 301 302 Parameters 303 ---------- 304 groupby : list or None 305 Columns to use to group the data in `analyse` method before 306 calculating stats. 307 qoi_cols : list or None 308 Columns of quantities of interest (for which stats will be 309 calculated). 310 stat_func : list[function] 311 List of statistical functions to be applied to data for bootstrapping. 312 alpha : float, default=0.05 313 Produce estimate of 100.0*(1-`alpha`) confidence interval. 314 sample_size : int 315 Size of the sample to be drawn from the input data. 316 n_boot_samples : int, default=1000 317 Number of times samples are to be drawn from the input data. 318 pivotal : bool, default=False 319 Use the pivotal method? Default to percentile method. 320 stat_name : str, default=None 321 Name to use to describe columns containing output statistic (for example 322 'mean'). If not provided, then attr '__name__' from each func is used. 323 """ 324 325 if not stat_func or stat_func is None: 326 raise ValueError('stat_func cannot be empty or None') 327 328 self.groupby = groupby 329 self.qoi_cols = qoi_cols 330 331 self.stat_func = stat_func 332 self.alpha = alpha 333 self.sample_size = sample_size 334 self.n_boot_samples = n_boot_samples 335 self.pivotal = pivotal 336 self.stat_name = stat_name if stat_name is not None else [func.__name__ for func in stat_func] 337 338 self.output_type = OutputType.SUMMARY 339 340 def element_name(self): 341 """Name for this element for logging purposes""" 342 return "ensemble_boot_multiple" 343 344 def element_version(self): 345 """Version of this element for logging purposes""" 346 return "0.1" 347 348 def analyse(self, data_frame=None): 349 """Perform bootstrapping analysis on the input `data_frame`. 350 351 The data_frame is grouped according to `self.groupby` if specified and 352 analysis is performed on the columns selected in `self.qoi_cols` if set. 353 354 Parameters 355 ---------- 356 data_frame : :obj:`pandas.DataFrame` 357 Summary data produced through collation of simulation output. 358 359 Returns 360 ------- 361 :obj:`pandas.DataFrame` 362 Basic statistic for selected columns and groupings of data. 363 """ 364 365 if data_frame is None: 366 raise RuntimeError( 367 "This VVUQ element needs a data frame to analyse") 368 elif data_frame.empty: 369 raise RuntimeError( 370 "No data in data frame passed to analyse element") 371 frames = [] 372 for stat_func, stat_name in zip(self.stat_func, self.stat_name): 373 results = ensemble_bootstrap( 374 data_frame, 375 groupby=self.groupby, 376 qoi_cols=self.qoi_cols, 377 stat_func=stat_func, 378 alpha=self.alpha, 379 sample_size=self.sample_size, 380 n_samples=self.n_boot_samples, 381 pivotal=self.pivotal, 382 stat_name=stat_name) 383 frames.append(results) 384 return pd.concat(frames, axis=1, keys=self.stat_name).swaplevel(0, 1, axis=1)
32def confidence_interval(dist, value, alpha, pivotal=False): 33 """ 34 Get the bootstrap confidence interval for a given distribution. 35 36 Parameters 37 ---------- 38 dist: 39 Array containing distribution of bootstrap results. 40 value: 41 Value of statistic for which we are calculating error bars. 42 alpha: 43 The alpha value for the confidence intervals. 44 pivotal: 45 Use the pivotal method? Default to percentile method. 46 47 Returns 48 ------- 49 50 float: 51 Value of the bootstrap statistic 52 float: 53 Highest value of the confidence interval 54 float: 55 Lowest value of the confidence interval 56 57 """ 58 if len(dist) < 1: 59 raise ValueError("Dist array should be non-empty") 60 61 if pivotal: 62 63 low = 2 * value - np.percentile(dist, 100 * (1 - alpha / 2.), axis=0) 64 stat = value 65 high = 2 * value - np.percentile(dist, 100 * (alpha / 2.), axis=0) 66 67 else: 68 69 low = np.percentile(dist, 100 * (alpha / 2.), axis=0) 70 stat = np.percentile(dist, 50) 71 high = np.percentile(dist, 100 * (1 - alpha / 2.), axis=0) 72 73 # if low > high: 74 # (low, high) = (high, low) 75 76 return stat, low, high
Get the bootstrap confidence interval for a given distribution.
Parameters
- dist:: Array containing distribution of bootstrap results.
- value:: Value of statistic for which we are calculating error bars.
- alpha:: The alpha value for the confidence intervals.
- pivotal:: Use the pivotal method? Default to percentile method.
Returns
- float:: Value of the bootstrap statistic
- float:: Highest value of the confidence interval
- float:: Lowest value of the confidence interval
79def bootstrap(data, stat_func, alpha=0.05, 80 sample_size=None, n_samples=1000, 81 pivotal=False): 82 """ 83 84 Parameters 85 ---------- 86 data : :obj:`pandas.DataFrame` 87 Input data to be analysed. 88 stat_func : function 89 Statistical function to be applied to data for bootstrapping. 90 alpha : float 91 Produce estimate of 100.0*(1-`alpha`) confidence interval. 92 sample_size : int 93 Size of the sample to be drawn from the input data. 94 n_samples : int 95 Number of times samples are to be drawn from the input data. 96 pivotal : bool 97 Use the pivotal method? Default to percentile method. 98 99 Returns 100 ------- 101 float: 102 Value of the bootstrap statistic 103 float: 104 Highest value of the confidence interval 105 float: 106 Lowest value of the confidence interval 107 """ 108 if data.empty: 109 raise RuntimeError("DataFrame passed to bootstrap has to be non-empty") 110 111 stat = data.apply(stat_func) 112 113 if sample_size is None: 114 sample_size = len(data) 115 116 dist = [] 117 118 for l in range(n_samples): 119 120 sample = data.sample(sample_size, replace=True) 121 122 dist.append(stat_func(sample)) 123 124 return confidence_interval(dist, stat, alpha, pivotal=pivotal)
Parameters
- data (
pandas.DataFrame): Input data to be analysed. - stat_func (function): Statistical function to be applied to data for bootstrapping.
- alpha (float):
Produce estimate of 100.0*(1-
alpha) confidence interval. - sample_size (int): Size of the sample to be drawn from the input data.
- n_samples (int): Number of times samples are to be drawn from the input data.
- pivotal (bool): Use the pivotal method? Default to percentile method.
Returns
- float:: Value of the bootstrap statistic
- float:: Highest value of the confidence interval
- float:: Lowest value of the confidence interval
127def ensemble_bootstrap(data, groupby=[], qoi_cols=[], 128 stat_func=np.mean, alpha=0.05, 129 sample_size=None, n_samples=1000, 130 pivotal=False, stat_name='boot'): 131 """ 132 Perform bootstrapping analysis on input data. 133 134 Parameters 135 ---------- 136 data : :obj:`pandas.DataFrame` 137 DataFrame to be analysed. 138 groupby : list or None 139 Columns to use to group the data in `analyse` method before 140 calculating stats. 141 qoi_cols : list or None 142 Columns of quantities of interest (for which stats will be 143 calculated). 144 stat_func : function 145 Statistical function to be applied to data for bootstrapping. 146 alpha : float, default=0.05 147 Produce estimate of 100.0*(1-`alpha`) confidence interval. 148 sample_size : int 149 Size of the sample to be drawn from the input data. 150 n_samples : int, default=1000 151 Number of times samples are to be drawn from the input data. 152 pivotal : bool, default=False 153 Use the pivotal method? Default to percentile method. 154 stat_name : str, default='boot' 155 Name to use to describe columns containing output statistic (for example 156 'mean'). 157 158 Returns 159 ------- 160 :obj:`pandas.DataFrame` 161 Description of input data using bootstrap statistic and high/low 162 confidence intervals. 163 """ 164 165 agg_funcs = {} 166 167 if not qoi_cols: 168 qoi_cols = [ 169 x for x in data.columns if x not in groupby + ['run_id', 'status']] 170 171 for col in qoi_cols: 172 if col not in data: 173 raise RuntimeError(f"No such attribute: {col}\nAttributes found in data: {data}") 174 agg_funcs[col] = lambda x: bootstrap( 175 x, 176 stat_func=stat_func, 177 alpha=alpha, 178 sample_size=sample_size, 179 n_samples=n_samples, 180 pivotal=pivotal) 181 182 if not groupby: 183 grouped_data = data.groupby(lambda x: True, sort=False) 184 else: 185 grouped_data = data.groupby(groupby, sort=False) 186 187 # Apply bootstrapping to all value columns selected 188 # Note results come a tuple per cell 189 results = grouped_data.agg(agg_funcs) 190 191 outputs = [stat_name, 'low', 'high'] 192 193 # Split out tuples in each cell and provide sensible naming 194 results = pd.concat({col: results[col].apply( 195 lambda cell: pd.Series(cell, index=outputs) 196 ) 197 for col in qoi_cols}, axis=1) 198 199 return results
Perform bootstrapping analysis on input data.
Parameters
- data (
pandas.DataFrame): DataFrame to be analysed. - groupby (list or None):
Columns to use to group the data in
analysemethod before calculating stats. - qoi_cols (list or None): Columns of quantities of interest (for which stats will be calculated).
- stat_func (function): Statistical function to be applied to data for bootstrapping.
- alpha (float, default=0.05):
Produce estimate of 100.0*(1-
alpha) confidence interval. - sample_size (int): Size of the sample to be drawn from the input data.
- n_samples (int, default=1000): Number of times samples are to be drawn from the input data.
- pivotal (bool, default=False): Use the pivotal method? Default to percentile method.
- stat_name (str, default='boot'): Name to use to describe columns containing output statistic (for example 'mean').
Returns
pandas.DataFrame: Description of input data using bootstrap statistic and high/low confidence intervals.
202class EnsembleBoot(BaseAnalysisElement): 203 204 def __init__(self, groupby=[], qoi_cols=[], 205 stat_func=np.mean, alpha=0.05, 206 sample_size=None, n_boot_samples=1000, 207 pivotal=False, stat_name='boot'): 208 """ 209 Element to perform bootstrapping on collated simulation output. 210 211 Parameters 212 ---------- 213 groupby : list or None 214 Columns to use to group the data in `analyse` method before 215 calculating stats. 216 qoi_cols : list or None 217 Columns of quantities of interest (for which stats will be 218 calculated). 219 stat_func : function 220 Statistical function to be applied to data for bootstrapping. 221 alpha : float, default=0.05 222 Produce estimate of 100.0*(1-`alpha`) confidence interval. 223 sample_size : int 224 Size of the sample to be drawn from the input data. 225 n_boot_samples : int, default=1000 226 Number of times samples are to be drawn from the input data. 227 pivotal : bool, default=False 228 Use the pivotal method? Default to percentile method. 229 stat_name : str, default='boot' 230 Name to use to describe columns containing output statistic (for example 231 'mean'). 232 """ 233 234 self.groupby = groupby 235 self.qoi_cols = qoi_cols 236 237 self.stat_func = stat_func 238 self.alpha = alpha 239 self.sample_size = sample_size 240 self.n_boot_samples = n_boot_samples 241 self.pivotal = pivotal 242 self.stat_name = stat_name 243 244 self.output_type = OutputType.SUMMARY 245 246 if self.stat_func is None: 247 raise ValueError('stat_func cannot be None.') 248 249 def element_name(self): 250 """Name for this element for logging purposes""" 251 return "ensemble_boot" 252 253 def element_version(self): 254 """Version of this element for logging purposes""" 255 return "0.1" 256 257 def analyse(self, data_frame=None): 258 """Perform bootstrapping analysis on the input `data_frame`. 259 260 The data_frame is grouped according to `self.groupby` if specified and 261 analysis is performed on the columns selected in `self.qoi_cols` if set. 262 263 Parameters 264 ---------- 265 data_frame : :obj:`pandas.DataFrame` 266 Summary data produced through collation of simulation output. 267 268 Returns 269 ------- 270 :obj:`pandas.DataFrame` 271 Basic statistic for selected columns and groupings of data. 272 """ 273 274 if data_frame is None: 275 raise RuntimeError( 276 "This VVUQ element needs a data frame to analyse") 277 elif data_frame.empty: 278 raise RuntimeError( 279 "No data in data frame passed to analyse element") 280 281 results = ensemble_bootstrap( 282 data_frame, 283 groupby=self.groupby, 284 qoi_cols=self.qoi_cols, 285 stat_func=self.stat_func, 286 alpha=self.alpha, 287 sample_size=self.sample_size, 288 n_samples=self.n_boot_samples, 289 pivotal=self.pivotal, 290 stat_name=self.stat_name) 291 292 return results
Base class for all EasyVVUQ analysis elements.
Attributes
204 def __init__(self, groupby=[], qoi_cols=[], 205 stat_func=np.mean, alpha=0.05, 206 sample_size=None, n_boot_samples=1000, 207 pivotal=False, stat_name='boot'): 208 """ 209 Element to perform bootstrapping on collated simulation output. 210 211 Parameters 212 ---------- 213 groupby : list or None 214 Columns to use to group the data in `analyse` method before 215 calculating stats. 216 qoi_cols : list or None 217 Columns of quantities of interest (for which stats will be 218 calculated). 219 stat_func : function 220 Statistical function to be applied to data for bootstrapping. 221 alpha : float, default=0.05 222 Produce estimate of 100.0*(1-`alpha`) confidence interval. 223 sample_size : int 224 Size of the sample to be drawn from the input data. 225 n_boot_samples : int, default=1000 226 Number of times samples are to be drawn from the input data. 227 pivotal : bool, default=False 228 Use the pivotal method? Default to percentile method. 229 stat_name : str, default='boot' 230 Name to use to describe columns containing output statistic (for example 231 'mean'). 232 """ 233 234 self.groupby = groupby 235 self.qoi_cols = qoi_cols 236 237 self.stat_func = stat_func 238 self.alpha = alpha 239 self.sample_size = sample_size 240 self.n_boot_samples = n_boot_samples 241 self.pivotal = pivotal 242 self.stat_name = stat_name 243 244 self.output_type = OutputType.SUMMARY 245 246 if self.stat_func is None: 247 raise ValueError('stat_func cannot be None.')
Element to perform bootstrapping on collated simulation output.
Parameters
- groupby (list or None):
Columns to use to group the data in
analysemethod before calculating stats. - qoi_cols (list or None): Columns of quantities of interest (for which stats will be calculated).
- stat_func (function): Statistical function to be applied to data for bootstrapping.
- alpha (float, default=0.05):
Produce estimate of 100.0*(1-
alpha) confidence interval. - sample_size (int): Size of the sample to be drawn from the input data.
- n_boot_samples (int, default=1000): Number of times samples are to be drawn from the input data.
- pivotal (bool, default=False): Use the pivotal method? Default to percentile method.
- stat_name (str, default='boot'): Name to use to describe columns containing output statistic (for example 'mean').
249 def element_name(self): 250 """Name for this element for logging purposes""" 251 return "ensemble_boot"
Name for this element for logging purposes
253 def element_version(self): 254 """Version of this element for logging purposes""" 255 return "0.1"
Version of this element for logging purposes
257 def analyse(self, data_frame=None): 258 """Perform bootstrapping analysis on the input `data_frame`. 259 260 The data_frame is grouped according to `self.groupby` if specified and 261 analysis is performed on the columns selected in `self.qoi_cols` if set. 262 263 Parameters 264 ---------- 265 data_frame : :obj:`pandas.DataFrame` 266 Summary data produced through collation of simulation output. 267 268 Returns 269 ------- 270 :obj:`pandas.DataFrame` 271 Basic statistic for selected columns and groupings of data. 272 """ 273 274 if data_frame is None: 275 raise RuntimeError( 276 "This VVUQ element needs a data frame to analyse") 277 elif data_frame.empty: 278 raise RuntimeError( 279 "No data in data frame passed to analyse element") 280 281 results = ensemble_bootstrap( 282 data_frame, 283 groupby=self.groupby, 284 qoi_cols=self.qoi_cols, 285 stat_func=self.stat_func, 286 alpha=self.alpha, 287 sample_size=self.sample_size, 288 n_samples=self.n_boot_samples, 289 pivotal=self.pivotal, 290 stat_name=self.stat_name) 291 292 return results
Perform bootstrapping analysis on the input data_frame.
The data_frame is grouped according to self.groupby if specified and
analysis is performed on the columns selected in self.qoi_cols if set.
Parameters
- data_frame (
pandas.DataFrame): Summary data produced through collation of simulation output.
Returns
pandas.DataFrame: Basic statistic for selected columns and groupings of data.
Inherited Members
294class EnsembleBootMultiple(BaseAnalysisElement): 295 296 def __init__(self, groupby=[], qoi_cols=[], 297 stat_func=[np.mean], alpha=0.05, 298 sample_size=None, n_boot_samples=1000, 299 pivotal=False, stat_name=None): 300 """ 301 Element to perform bootstrapping on collated simulation output. 302 303 Parameters 304 ---------- 305 groupby : list or None 306 Columns to use to group the data in `analyse` method before 307 calculating stats. 308 qoi_cols : list or None 309 Columns of quantities of interest (for which stats will be 310 calculated). 311 stat_func : list[function] 312 List of statistical functions to be applied to data for bootstrapping. 313 alpha : float, default=0.05 314 Produce estimate of 100.0*(1-`alpha`) confidence interval. 315 sample_size : int 316 Size of the sample to be drawn from the input data. 317 n_boot_samples : int, default=1000 318 Number of times samples are to be drawn from the input data. 319 pivotal : bool, default=False 320 Use the pivotal method? Default to percentile method. 321 stat_name : str, default=None 322 Name to use to describe columns containing output statistic (for example 323 'mean'). If not provided, then attr '__name__' from each func is used. 324 """ 325 326 if not stat_func or stat_func is None: 327 raise ValueError('stat_func cannot be empty or None') 328 329 self.groupby = groupby 330 self.qoi_cols = qoi_cols 331 332 self.stat_func = stat_func 333 self.alpha = alpha 334 self.sample_size = sample_size 335 self.n_boot_samples = n_boot_samples 336 self.pivotal = pivotal 337 self.stat_name = stat_name if stat_name is not None else [func.__name__ for func in stat_func] 338 339 self.output_type = OutputType.SUMMARY 340 341 def element_name(self): 342 """Name for this element for logging purposes""" 343 return "ensemble_boot_multiple" 344 345 def element_version(self): 346 """Version of this element for logging purposes""" 347 return "0.1" 348 349 def analyse(self, data_frame=None): 350 """Perform bootstrapping analysis on the input `data_frame`. 351 352 The data_frame is grouped according to `self.groupby` if specified and 353 analysis is performed on the columns selected in `self.qoi_cols` if set. 354 355 Parameters 356 ---------- 357 data_frame : :obj:`pandas.DataFrame` 358 Summary data produced through collation of simulation output. 359 360 Returns 361 ------- 362 :obj:`pandas.DataFrame` 363 Basic statistic for selected columns and groupings of data. 364 """ 365 366 if data_frame is None: 367 raise RuntimeError( 368 "This VVUQ element needs a data frame to analyse") 369 elif data_frame.empty: 370 raise RuntimeError( 371 "No data in data frame passed to analyse element") 372 frames = [] 373 for stat_func, stat_name in zip(self.stat_func, self.stat_name): 374 results = ensemble_bootstrap( 375 data_frame, 376 groupby=self.groupby, 377 qoi_cols=self.qoi_cols, 378 stat_func=stat_func, 379 alpha=self.alpha, 380 sample_size=self.sample_size, 381 n_samples=self.n_boot_samples, 382 pivotal=self.pivotal, 383 stat_name=stat_name) 384 frames.append(results) 385 return pd.concat(frames, axis=1, keys=self.stat_name).swaplevel(0, 1, axis=1)
Base class for all EasyVVUQ analysis elements.
Attributes
296 def __init__(self, groupby=[], qoi_cols=[], 297 stat_func=[np.mean], alpha=0.05, 298 sample_size=None, n_boot_samples=1000, 299 pivotal=False, stat_name=None): 300 """ 301 Element to perform bootstrapping on collated simulation output. 302 303 Parameters 304 ---------- 305 groupby : list or None 306 Columns to use to group the data in `analyse` method before 307 calculating stats. 308 qoi_cols : list or None 309 Columns of quantities of interest (for which stats will be 310 calculated). 311 stat_func : list[function] 312 List of statistical functions to be applied to data for bootstrapping. 313 alpha : float, default=0.05 314 Produce estimate of 100.0*(1-`alpha`) confidence interval. 315 sample_size : int 316 Size of the sample to be drawn from the input data. 317 n_boot_samples : int, default=1000 318 Number of times samples are to be drawn from the input data. 319 pivotal : bool, default=False 320 Use the pivotal method? Default to percentile method. 321 stat_name : str, default=None 322 Name to use to describe columns containing output statistic (for example 323 'mean'). If not provided, then attr '__name__' from each func is used. 324 """ 325 326 if not stat_func or stat_func is None: 327 raise ValueError('stat_func cannot be empty or None') 328 329 self.groupby = groupby 330 self.qoi_cols = qoi_cols 331 332 self.stat_func = stat_func 333 self.alpha = alpha 334 self.sample_size = sample_size 335 self.n_boot_samples = n_boot_samples 336 self.pivotal = pivotal 337 self.stat_name = stat_name if stat_name is not None else [func.__name__ for func in stat_func] 338 339 self.output_type = OutputType.SUMMARY
Element to perform bootstrapping on collated simulation output.
Parameters
- groupby (list or None):
Columns to use to group the data in
analysemethod before calculating stats. - qoi_cols (list or None): Columns of quantities of interest (for which stats will be calculated).
- stat_func (list[function]): List of statistical functions to be applied to data for bootstrapping.
- alpha (float, default=0.05):
Produce estimate of 100.0*(1-
alpha) confidence interval. - sample_size (int): Size of the sample to be drawn from the input data.
- n_boot_samples (int, default=1000): Number of times samples are to be drawn from the input data.
- pivotal (bool, default=False): Use the pivotal method? Default to percentile method.
- stat_name (str, default=None): Name to use to describe columns containing output statistic (for example 'mean'). If not provided, then attr '__name__' from each func is used.
341 def element_name(self): 342 """Name for this element for logging purposes""" 343 return "ensemble_boot_multiple"
Name for this element for logging purposes
345 def element_version(self): 346 """Version of this element for logging purposes""" 347 return "0.1"
Version of this element for logging purposes
349 def analyse(self, data_frame=None): 350 """Perform bootstrapping analysis on the input `data_frame`. 351 352 The data_frame is grouped according to `self.groupby` if specified and 353 analysis is performed on the columns selected in `self.qoi_cols` if set. 354 355 Parameters 356 ---------- 357 data_frame : :obj:`pandas.DataFrame` 358 Summary data produced through collation of simulation output. 359 360 Returns 361 ------- 362 :obj:`pandas.DataFrame` 363 Basic statistic for selected columns and groupings of data. 364 """ 365 366 if data_frame is None: 367 raise RuntimeError( 368 "This VVUQ element needs a data frame to analyse") 369 elif data_frame.empty: 370 raise RuntimeError( 371 "No data in data frame passed to analyse element") 372 frames = [] 373 for stat_func, stat_name in zip(self.stat_func, self.stat_name): 374 results = ensemble_bootstrap( 375 data_frame, 376 groupby=self.groupby, 377 qoi_cols=self.qoi_cols, 378 stat_func=stat_func, 379 alpha=self.alpha, 380 sample_size=self.sample_size, 381 n_samples=self.n_boot_samples, 382 pivotal=self.pivotal, 383 stat_name=stat_name) 384 frames.append(results) 385 return pd.concat(frames, axis=1, keys=self.stat_name).swaplevel(0, 1, axis=1)
Perform bootstrapping analysis on the input data_frame.
The data_frame is grouped according to self.groupby if specified and
analysis is performed on the columns selected in self.qoi_cols if set.
Parameters
- data_frame (
pandas.DataFrame): Summary data produced through collation of simulation output.
Returns
pandas.DataFrame: Basic statistic for selected columns and groupings of data.