easyvvuq.analysis.basic_stats

Provides analysis element for basic statistical analysis.

The analysis is based on pandas.DataFrame.describe() function.

  1"""Provides analysis element for basic statistical analysis.
  2
  3The analysis is based on `pandas.DataFrame.describe()` function.
  4"""
  5from easyvvuq import OutputType
  6from .base import BaseAnalysisElement
  7
  8__copyright__ = """
  9
 10    Copyright 2018 Robin A. Richardson, David W. Wright
 11
 12    This file is part of EasyVVUQ
 13
 14    EasyVVUQ is free software: you can redistribute it and/or modify
 15    it under the terms of the Lesser GNU General Public License as published by
 16    the Free Software Foundation, either version 3 of the License, or
 17    (at your option) any later version.
 18
 19    EasyVVUQ is distributed in the hope that it will be useful,
 20    but WITHOUT ANY WARRANTY; without even the implied warranty of
 21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 22    Lesser GNU General Public License for more details.
 23
 24    You should have received a copy of the Lesser GNU General Public License
 25    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 26
 27"""
 28__license__ = "LGPL"
 29
 30
 31class BasicStats(BaseAnalysisElement):
 32
 33    def __init__(self, groupby=None, qoi_cols=None):
 34        """Element to calculate basic stats for `qoi_cols` values.
 35
 36        This results in values for: count, mean, std, min, max and 25%, 50% &
 37        75% percentiles for each value in the analysis.
 38
 39        Parameters
 40        ----------
 41        groupby : list or None
 42            Columns to use to group the data in `analyse` method before
 43            calculating stats.
 44        qoi_cols : list or None
 45            Columns of quantities of interest (for which stats will be
 46            calculated).
 47        """
 48        self.groupby = groupby
 49        if qoi_cols is not None:
 50            self.qoi_cols = qoi_cols
 51        else:
 52            self.qoi_cols = []
 53        self.output_type = OutputType.SUMMARY
 54
 55    def element_name(self):
 56        """Name for this element for logging purposes"""
 57        return "basic_stats"
 58
 59    def element_version(self):
 60        """Version of this element for logging purposes"""
 61        return "0.1"
 62
 63    def analyse(self, data_frame=None):
 64        """Perform the basis stats analysis on the input `data_frame`.
 65
 66        Analysis is based on `pandas.Dataframe.describe` and results in
 67        values for: count, mean, std, min, max and 25%, 50% & 75% percentiles
 68        for each value in the analysis.
 69
 70        The data_frame is grouped according to `self.groupby` if specified and
 71        analysis is performed on the columns selected in `self.qoi_cols` if set.
 72
 73        Parameters
 74        ----------
 75        data_frame : :obj:`pandas.DataFrame`
 76            Summary data produced through collation of simulation output.
 77
 78        Returns
 79        -------
 80        :obj:`pandas.DataFrame`
 81            Basic statistic for selected columns and groupings of data.
 82        """
 83
 84        qoi_cols = self.qoi_cols
 85
 86        if data_frame is None:
 87            raise RuntimeError("Analysis element needs a data frame to "
 88                               "analyse")
 89        elif data_frame.empty:
 90            raise RuntimeError(
 91                "No data in data frame passed to analyse element")
 92
 93        # Get summary statistics
 94        if self.groupby:
 95            grouped_data = data_frame.groupby(self.groupby)
 96            results = grouped_data.describe()
 97            if qoi_cols:
 98                results = results[qoi_cols]
 99
100        else:
101            if qoi_cols:
102                results = data_frame[qoi_cols].describe()
103            else:
104                results = data_frame.describe()
105
106        return results
class BasicStats(easyvvuq.analysis.base.BaseAnalysisElement):
 32class BasicStats(BaseAnalysisElement):
 33
 34    def __init__(self, groupby=None, qoi_cols=None):
 35        """Element to calculate basic stats for `qoi_cols` values.
 36
 37        This results in values for: count, mean, std, min, max and 25%, 50% &
 38        75% percentiles for each value in the analysis.
 39
 40        Parameters
 41        ----------
 42        groupby : list or None
 43            Columns to use to group the data in `analyse` method before
 44            calculating stats.
 45        qoi_cols : list or None
 46            Columns of quantities of interest (for which stats will be
 47            calculated).
 48        """
 49        self.groupby = groupby
 50        if qoi_cols is not None:
 51            self.qoi_cols = qoi_cols
 52        else:
 53            self.qoi_cols = []
 54        self.output_type = OutputType.SUMMARY
 55
 56    def element_name(self):
 57        """Name for this element for logging purposes"""
 58        return "basic_stats"
 59
 60    def element_version(self):
 61        """Version of this element for logging purposes"""
 62        return "0.1"
 63
 64    def analyse(self, data_frame=None):
 65        """Perform the basis stats analysis on the input `data_frame`.
 66
 67        Analysis is based on `pandas.Dataframe.describe` and results in
 68        values for: count, mean, std, min, max and 25%, 50% & 75% percentiles
 69        for each value in the analysis.
 70
 71        The data_frame is grouped according to `self.groupby` if specified and
 72        analysis is performed on the columns selected in `self.qoi_cols` if set.
 73
 74        Parameters
 75        ----------
 76        data_frame : :obj:`pandas.DataFrame`
 77            Summary data produced through collation of simulation output.
 78
 79        Returns
 80        -------
 81        :obj:`pandas.DataFrame`
 82            Basic statistic for selected columns and groupings of data.
 83        """
 84
 85        qoi_cols = self.qoi_cols
 86
 87        if data_frame is None:
 88            raise RuntimeError("Analysis element needs a data frame to "
 89                               "analyse")
 90        elif data_frame.empty:
 91            raise RuntimeError(
 92                "No data in data frame passed to analyse element")
 93
 94        # Get summary statistics
 95        if self.groupby:
 96            grouped_data = data_frame.groupby(self.groupby)
 97            results = grouped_data.describe()
 98            if qoi_cols:
 99                results = results[qoi_cols]
100
101        else:
102            if qoi_cols:
103                results = data_frame[qoi_cols].describe()
104            else:
105                results = data_frame.describe()
106
107        return results

Base class for all EasyVVUQ analysis elements.

Attributes

BasicStats(groupby=None, qoi_cols=None)
34    def __init__(self, groupby=None, qoi_cols=None):
35        """Element to calculate basic stats for `qoi_cols` values.
36
37        This results in values for: count, mean, std, min, max and 25%, 50% &
38        75% percentiles for each value in the analysis.
39
40        Parameters
41        ----------
42        groupby : list or None
43            Columns to use to group the data in `analyse` method before
44            calculating stats.
45        qoi_cols : list or None
46            Columns of quantities of interest (for which stats will be
47            calculated).
48        """
49        self.groupby = groupby
50        if qoi_cols is not None:
51            self.qoi_cols = qoi_cols
52        else:
53            self.qoi_cols = []
54        self.output_type = OutputType.SUMMARY

Element to calculate basic stats for qoi_cols values.

This results in values for: count, mean, std, min, max and 25%, 50% & 75% percentiles for each value in the analysis.

Parameters
  • groupby (list or None): Columns to use to group the data in analyse method before calculating stats.
  • qoi_cols (list or None): Columns of quantities of interest (for which stats will be calculated).
groupby
output_type
def element_name(self):
56    def element_name(self):
57        """Name for this element for logging purposes"""
58        return "basic_stats"

Name for this element for logging purposes

def element_version(self):
60    def element_version(self):
61        """Version of this element for logging purposes"""
62        return "0.1"

Version of this element for logging purposes

def analyse(self, data_frame=None):
 64    def analyse(self, data_frame=None):
 65        """Perform the basis stats analysis on the input `data_frame`.
 66
 67        Analysis is based on `pandas.Dataframe.describe` and results in
 68        values for: count, mean, std, min, max and 25%, 50% & 75% percentiles
 69        for each value in the analysis.
 70
 71        The data_frame is grouped according to `self.groupby` if specified and
 72        analysis is performed on the columns selected in `self.qoi_cols` if set.
 73
 74        Parameters
 75        ----------
 76        data_frame : :obj:`pandas.DataFrame`
 77            Summary data produced through collation of simulation output.
 78
 79        Returns
 80        -------
 81        :obj:`pandas.DataFrame`
 82            Basic statistic for selected columns and groupings of data.
 83        """
 84
 85        qoi_cols = self.qoi_cols
 86
 87        if data_frame is None:
 88            raise RuntimeError("Analysis element needs a data frame to "
 89                               "analyse")
 90        elif data_frame.empty:
 91            raise RuntimeError(
 92                "No data in data frame passed to analyse element")
 93
 94        # Get summary statistics
 95        if self.groupby:
 96            grouped_data = data_frame.groupby(self.groupby)
 97            results = grouped_data.describe()
 98            if qoi_cols:
 99                results = results[qoi_cols]
100
101        else:
102            if qoi_cols:
103                results = data_frame[qoi_cols].describe()
104            else:
105                results = data_frame.describe()
106
107        return results

Perform the basis stats analysis on the input data_frame.

Analysis is based on pandas.Dataframe.describe and results in values for: count, mean, std, min, max and 25%, 50% & 75% percentiles for each value in the analysis.

The data_frame is grouped according to self.groupby if specified and analysis is performed on the columns selected in self.qoi_cols if set.

Parameters
  • data_frame (pandas.DataFrame): Summary data produced through collation of simulation output.
Returns
  • pandas.DataFrame: Basic statistic for selected columns and groupings of data.