easyvvuq.analysis.mcmc

Analysis element for the Markov Chain Monte Carlo (MCMC) method. For more details on the method see the easyvvuq.sampling.MonteCarloSampler class. The analysis part of Markov Chain Monte Carlo consists of approximating the distribution from the results obtained by evaluating the samples.

  1"""Analysis element for the Markov Chain Monte Carlo (MCMC) method.
  2For more details on the method see the `easyvvuq.sampling.MonteCarloSampler` class.
  3The analysis part of Markov Chain Monte Carlo consists of approximating the distribution
  4from the results obtained by evaluating the samples.
  5"""
  6import pandas as pd
  7from .base import BaseAnalysisElement
  8from .results import AnalysisResults
  9
 10
 11class MCMCAnalysisResults(AnalysisResults):
 12    """The analysis results class for MCMC. You will not need to instantiate this
 13    class manually.
 14
 15    Parameters
 16    ----------
 17    chains: dict
 18        A dictionary with pandas DataFrame that correspond to an MCMC chain each.
 19        A chain consists of points that MCMC has visited. From this a distribution
 20        of the input variables can be constructed by means of a simple histogram.
 21    """
 22
 23    def __init__(self, chains):
 24        self.chains = chains
 25
 26    def plot_hist(self, input_parameter, chain=None, skip=0, merge=True):
 27        """Will plot a histogram for a given input parameter.
 28
 29        Parameters
 30        ----------
 31        input_parameter: str
 32            An input parameter name to draw the histogram for.
 33        chain: int, optional
 34            Index of a chain to be plotted.
 35        skip: int
 36            How many steps to skip (for getting rid of burn-in).
 37        merge: bool
 38            If set to True will use all chains to construct the histogram.
 39        """
 40        import matplotlib.pyplot as plt
 41        input_parameter = (input_parameter, 0)
 42        if merge:
 43            chain_keys = list(self.chains.keys())
 44            df = pd.concat([self.chains[ck][input_parameter].iloc[skip:] for ck in chain_keys])
 45            plt.hist(df, 20)
 46        else:
 47            plt.hist(self.chains[chain][input_parameter].iloc[skip:], 20)
 48
 49    def plot_chains(self, input_parameter, chain=None):
 50        """Will plot the chains with the input parameter value in the y axis.
 51
 52        Parameters
 53        ----------
 54        input_parameter: str
 55            Input parameter name.
 56        chain: int, optional
 57            The chain number of the chain to plot.
 58        """
 59        import matplotlib.pyplot as plt
 60        if chain is None:
 61            for chain in self.chains:
 62                plt.plot(self.chains[chain][(input_parameter, 0)])
 63        else:
 64            plt.plot(self.chains[chain][(input_parameter, 0)])
 65
 66
 67class MCMCAnalysis(BaseAnalysisElement):
 68    """The analysis part of the MCMC method in EasyVVUQ
 69
 70    Parameters
 71    ----------
 72    sampler: MCMCSampler
 73       An instance of MCMCSampler used to generate MCMC samples.
 74    """
 75
 76    def __init__(self, sampler):
 77        self.sampler = sampler
 78
 79    def analyse(self, df):
 80        """Performs some pre-processing on the chains in order to be able to construct
 81        the histograms or other methods of distribution estimation.
 82
 83        Parameters
 84        ----------
 85        df: DataFrame
 86            DataFrame with the results obtained by evaluating the samples generated by the
 87            MCMC sampler.
 88        """
 89        chains = dict([(chain_id, []) for chain_id in df[('chain_id', 0)].unique()])
 90        for chain in chains:
 91            chain_values = df[df[('chain_id', 0)] == chain]
 92            values = chain_values.groupby(('iteration', 0)).apply(lambda x: x.mean())
 93            indexes = values.index.values
 94            for a, b in zip(indexes[:-1], indexes[1:]):
 95                chains[chain] += [values.loc[a][self.sampler.inputs].to_dict()] * (b - a)
 96        for chain in chains:
 97            tmp = dict([(input_, []) for input_ in chains[chain][0]])
 98            for row in chains[chain]:
 99                for input_ in chains[chain][0]:
100                    tmp[input_].append(row[input_])
101            chains[chain] = pd.DataFrame(tmp)
102        return MCMCAnalysisResults(chains)
class MCMCAnalysisResults(easyvvuq.analysis.results.AnalysisResults):
12class MCMCAnalysisResults(AnalysisResults):
13    """The analysis results class for MCMC. You will not need to instantiate this
14    class manually.
15
16    Parameters
17    ----------
18    chains: dict
19        A dictionary with pandas DataFrame that correspond to an MCMC chain each.
20        A chain consists of points that MCMC has visited. From this a distribution
21        of the input variables can be constructed by means of a simple histogram.
22    """
23
24    def __init__(self, chains):
25        self.chains = chains
26
27    def plot_hist(self, input_parameter, chain=None, skip=0, merge=True):
28        """Will plot a histogram for a given input parameter.
29
30        Parameters
31        ----------
32        input_parameter: str
33            An input parameter name to draw the histogram for.
34        chain: int, optional
35            Index of a chain to be plotted.
36        skip: int
37            How many steps to skip (for getting rid of burn-in).
38        merge: bool
39            If set to True will use all chains to construct the histogram.
40        """
41        import matplotlib.pyplot as plt
42        input_parameter = (input_parameter, 0)
43        if merge:
44            chain_keys = list(self.chains.keys())
45            df = pd.concat([self.chains[ck][input_parameter].iloc[skip:] for ck in chain_keys])
46            plt.hist(df, 20)
47        else:
48            plt.hist(self.chains[chain][input_parameter].iloc[skip:], 20)
49
50    def plot_chains(self, input_parameter, chain=None):
51        """Will plot the chains with the input parameter value in the y axis.
52
53        Parameters
54        ----------
55        input_parameter: str
56            Input parameter name.
57        chain: int, optional
58            The chain number of the chain to plot.
59        """
60        import matplotlib.pyplot as plt
61        if chain is None:
62            for chain in self.chains:
63                plt.plot(self.chains[chain][(input_parameter, 0)])
64        else:
65            plt.plot(self.chains[chain][(input_parameter, 0)])

The analysis results class for MCMC. You will not need to instantiate this class manually.

Parameters
  • chains (dict): A dictionary with pandas DataFrame that correspond to an MCMC chain each. A chain consists of points that MCMC has visited. From this a distribution of the input variables can be constructed by means of a simple histogram.
MCMCAnalysisResults(chains)
24    def __init__(self, chains):
25        self.chains = chains
chains
def plot_hist(self, input_parameter, chain=None, skip=0, merge=True):
27    def plot_hist(self, input_parameter, chain=None, skip=0, merge=True):
28        """Will plot a histogram for a given input parameter.
29
30        Parameters
31        ----------
32        input_parameter: str
33            An input parameter name to draw the histogram for.
34        chain: int, optional
35            Index of a chain to be plotted.
36        skip: int
37            How many steps to skip (for getting rid of burn-in).
38        merge: bool
39            If set to True will use all chains to construct the histogram.
40        """
41        import matplotlib.pyplot as plt
42        input_parameter = (input_parameter, 0)
43        if merge:
44            chain_keys = list(self.chains.keys())
45            df = pd.concat([self.chains[ck][input_parameter].iloc[skip:] for ck in chain_keys])
46            plt.hist(df, 20)
47        else:
48            plt.hist(self.chains[chain][input_parameter].iloc[skip:], 20)

Will plot a histogram for a given input parameter.

Parameters
  • input_parameter (str): An input parameter name to draw the histogram for.
  • chain (int, optional): Index of a chain to be plotted.
  • skip (int): How many steps to skip (for getting rid of burn-in).
  • merge (bool): If set to True will use all chains to construct the histogram.
def plot_chains(self, input_parameter, chain=None):
50    def plot_chains(self, input_parameter, chain=None):
51        """Will plot the chains with the input parameter value in the y axis.
52
53        Parameters
54        ----------
55        input_parameter: str
56            Input parameter name.
57        chain: int, optional
58            The chain number of the chain to plot.
59        """
60        import matplotlib.pyplot as plt
61        if chain is None:
62            for chain in self.chains:
63                plt.plot(self.chains[chain][(input_parameter, 0)])
64        else:
65            plt.plot(self.chains[chain][(input_parameter, 0)])

Will plot the chains with the input parameter value in the y axis.

Parameters
  • input_parameter (str): Input parameter name.
  • chain (int, optional): The chain number of the chain to plot.
class MCMCAnalysis(easyvvuq.analysis.base.BaseAnalysisElement):
 68class MCMCAnalysis(BaseAnalysisElement):
 69    """The analysis part of the MCMC method in EasyVVUQ
 70
 71    Parameters
 72    ----------
 73    sampler: MCMCSampler
 74       An instance of MCMCSampler used to generate MCMC samples.
 75    """
 76
 77    def __init__(self, sampler):
 78        self.sampler = sampler
 79
 80    def analyse(self, df):
 81        """Performs some pre-processing on the chains in order to be able to construct
 82        the histograms or other methods of distribution estimation.
 83
 84        Parameters
 85        ----------
 86        df: DataFrame
 87            DataFrame with the results obtained by evaluating the samples generated by the
 88            MCMC sampler.
 89        """
 90        chains = dict([(chain_id, []) for chain_id in df[('chain_id', 0)].unique()])
 91        for chain in chains:
 92            chain_values = df[df[('chain_id', 0)] == chain]
 93            values = chain_values.groupby(('iteration', 0)).apply(lambda x: x.mean())
 94            indexes = values.index.values
 95            for a, b in zip(indexes[:-1], indexes[1:]):
 96                chains[chain] += [values.loc[a][self.sampler.inputs].to_dict()] * (b - a)
 97        for chain in chains:
 98            tmp = dict([(input_, []) for input_ in chains[chain][0]])
 99            for row in chains[chain]:
100                for input_ in chains[chain][0]:
101                    tmp[input_].append(row[input_])
102            chains[chain] = pd.DataFrame(tmp)
103        return MCMCAnalysisResults(chains)

The analysis part of the MCMC method in EasyVVUQ

Parameters
  • sampler (MCMCSampler): An instance of MCMCSampler used to generate MCMC samples.
MCMCAnalysis(sampler)
77    def __init__(self, sampler):
78        self.sampler = sampler
sampler
def analyse(self, df):
 80    def analyse(self, df):
 81        """Performs some pre-processing on the chains in order to be able to construct
 82        the histograms or other methods of distribution estimation.
 83
 84        Parameters
 85        ----------
 86        df: DataFrame
 87            DataFrame with the results obtained by evaluating the samples generated by the
 88            MCMC sampler.
 89        """
 90        chains = dict([(chain_id, []) for chain_id in df[('chain_id', 0)].unique()])
 91        for chain in chains:
 92            chain_values = df[df[('chain_id', 0)] == chain]
 93            values = chain_values.groupby(('iteration', 0)).apply(lambda x: x.mean())
 94            indexes = values.index.values
 95            for a, b in zip(indexes[:-1], indexes[1:]):
 96                chains[chain] += [values.loc[a][self.sampler.inputs].to_dict()] * (b - a)
 97        for chain in chains:
 98            tmp = dict([(input_, []) for input_ in chains[chain][0]])
 99            for row in chains[chain]:
100                for input_ in chains[chain][0]:
101                    tmp[input_].append(row[input_])
102            chains[chain] = pd.DataFrame(tmp)
103        return MCMCAnalysisResults(chains)

Performs some pre-processing on the chains in order to be able to construct the histograms or other methods of distribution estimation.

Parameters
  • df (DataFrame): DataFrame with the results obtained by evaluating the samples generated by the MCMC sampler.