easyvvuq.sampling.mcmc

  1from .base import BaseSamplingElement
  2import numpy as np
  3import os
  4
  5class MCMCSampler(BaseSamplingElement, sampler_name='mcmc_sampler'):
  6    """A Metropolis-Hastings MCMC Sampler.
  7
  8    Parameters
  9    ----------
 10    init: dict
 11       Initial values for each input parameter. Of the form {'input1': value, ...}
 12    q: function
 13       A function of one argument X (dictionary) that returns the proposal distribution conditional on
 14       the `X`.
 15    qoi: str
 16       Name of the quantity of interest
 17    n_chains: int
 18       Number of MCMC chains to run in paralle.
 19    estimator: function
 20       To be used with replica_col argument. Outputs an estimate of some
 21       parameter when given a sample array.
 22    """
 23
 24    def __init__(self, init, q, qoi, n_chains=1, likelihood=lambda x: x[0], estimator=None):
 25        for param in init:
 26            if not hasattr(init[param], '__iter__'):
 27                raise RuntimeError(
 28                    'all input intializations should be iterables of same length as there are chains')
 29            if len(init[param]) != n_chains:
 30                raise RuntimeError(
 31                    'initialization dictionary should have separate values for each chain')
 32        self.init = dict(init)
 33        self.inputs = list(self.init.keys())
 34        for input_ in self.inputs:
 35            if len(self.init[input_]) != n_chains:
 36                raise RuntimeError("The init dictionary must contains the same number \
 37                                    of values for each input as there are chains.")
 38        self.n_chains = n_chains
 39        self.x = []
 40        self.q = q
 41        self.qoi = qoi
 42        self.current_chain = 0
 43        for chain in range(self.n_chains):
 44            self.x.append(dict([(key, self.init[key][chain]) for key in self.inputs]))
 45            self.x[chain]['chain_id'] = chain
 46        self.f_x = [None] * n_chains
 47        self.stop = False
 48        self.likelihood = lambda x: np.exp(likelihood(x))
 49        self.n_replicas = None
 50        self.estimator = estimator
 51        self.acceptance_ratios = []
 52        self.iteration = 0
 53
 54    def is_finite(self):
 55        return True
 56
 57    def n_samples(self):
 58        return self.n_chains
 59
 60    def __iter__(self):
 61        self.current_chain = 0
 62        return self
 63
 64    def __next__(self):
 65        """Returns next MCMC sample.
 66
 67        Returns
 68        -------
 69        dict
 70           A dictionary where keys are input variables names and values are input values.
 71        """
 72        if self.stop:
 73            raise StopIteration
 74        if self.f_x[self.current_chain] is None:
 75            try:
 76                return self.x[self.current_chain]
 77            finally:
 78                self.current_chain = (self.current_chain + 1) % self.n_chains
 79                if self.current_chain == 0:
 80                    self.stop = True
 81        y = {}
 82        y_ = self.q(self.x[self.current_chain]).sample()
 83        for i, key in enumerate(self.inputs):
 84            y[key] = y_[i][0]
 85        y['chain_id'] = self.current_chain
 86        self.current_chain = (self.current_chain + 1) % self.n_chains
 87        if self.current_chain == 0:
 88            self.stop = True
 89        return y
 90
 91    def update(self, result, invalid):
 92        """Performs the MCMC sampling procedure on the campaign.
 93
 94        Parameters
 95        ----------
 96        result: pandas DataFrame
 97            run information from previous iteration (same as collation DataFrame)
 98        invalid: pandas DataFrame
 99            invalid run information (runs that cannot be executed for some reason)
100
101        Returns
102        -------
103        list of rejected run ids
104        """
105        self.stop = False
106        if (self.estimator is not None) and (len(result) > 0):
107            result_grouped = result.groupby(('chain_id', 0)).apply(self.estimator)
108        else:
109            result_grouped = result
110        if (self.estimator is not None) and (len(invalid) > 0):
111            invalid_grouped = invalid.groupby(('chain_id', 0)).apply(lambda x: x.mean())
112        else:
113            invalid_grouped = invalid
114        ignored_chains = []
115        ignored_runs = []
116        # process normal runs
117        for row in result_grouped.iterrows():
118            row = row[1]
119            chain_id = int(row['chain_id'].values[0])
120            y = dict([(key, row[key][0]) for key in self.inputs])
121            if self.f_x[chain_id] is None:
122                self.f_x[chain_id] = self.likelihood(row[self.qoi].values)
123            else:
124                f_y = self.likelihood(row[self.qoi].values)
125                q_xy = self.q(y).pdf([self.x[chain_id][key] for key in self.inputs])
126                q_yx = self.q(self.x[chain_id]).pdf([y[key] for key in self.inputs])
127                if self.f_x[chain_id] == 0.0:
128                    r = 1.0
129                else:
130                    r = min(1.0, (f_y / self.f_x[chain_id]) * (q_xy / q_yx))
131                if np.random.random() < r:
132                    self.x[chain_id] = dict(y)
133                    self.f_x[chain_id] = f_y
134                else:
135                    ignored_chains.append(chain_id)
136        for row in invalid_grouped.iterrows():
137            row = row[1]
138            chain_id = int(row['chain_id'].values[0])
139            ignored_chains.append(chain_id)
140        for chain_id in ignored_chains:
141            try:
142                ignored_runs += list(result.loc[result[('chain_id', 0)]
143                                                == chain_id]['run_id'].values)
144            except KeyError:
145                if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored')
146            try:
147                ignored_runs += list(invalid.loc[invalid[('chain_id', 0)]
148                                                 == chain_id]['run_id'].values)
149            except KeyError:
150                if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored')
151        ignored_runs = [run[0] for run in ignored_runs]
152        self.iteration += 1
153        return ignored_runs
154
155    @property
156    def analysis_class(self):
157        """Returns a corresponding analysis class for this sampler.
158
159        Returns
160        -------
161        class
162        """
163        from easyvvuq.analysis import MCMCAnalysis
164        return MCMCAnalysis
class MCMCSampler(easyvvuq.sampling.base.BaseSamplingElement):
  6class MCMCSampler(BaseSamplingElement, sampler_name='mcmc_sampler'):
  7    """A Metropolis-Hastings MCMC Sampler.
  8
  9    Parameters
 10    ----------
 11    init: dict
 12       Initial values for each input parameter. Of the form {'input1': value, ...}
 13    q: function
 14       A function of one argument X (dictionary) that returns the proposal distribution conditional on
 15       the `X`.
 16    qoi: str
 17       Name of the quantity of interest
 18    n_chains: int
 19       Number of MCMC chains to run in paralle.
 20    estimator: function
 21       To be used with replica_col argument. Outputs an estimate of some
 22       parameter when given a sample array.
 23    """
 24
 25    def __init__(self, init, q, qoi, n_chains=1, likelihood=lambda x: x[0], estimator=None):
 26        for param in init:
 27            if not hasattr(init[param], '__iter__'):
 28                raise RuntimeError(
 29                    'all input intializations should be iterables of same length as there are chains')
 30            if len(init[param]) != n_chains:
 31                raise RuntimeError(
 32                    'initialization dictionary should have separate values for each chain')
 33        self.init = dict(init)
 34        self.inputs = list(self.init.keys())
 35        for input_ in self.inputs:
 36            if len(self.init[input_]) != n_chains:
 37                raise RuntimeError("The init dictionary must contains the same number \
 38                                    of values for each input as there are chains.")
 39        self.n_chains = n_chains
 40        self.x = []
 41        self.q = q
 42        self.qoi = qoi
 43        self.current_chain = 0
 44        for chain in range(self.n_chains):
 45            self.x.append(dict([(key, self.init[key][chain]) for key in self.inputs]))
 46            self.x[chain]['chain_id'] = chain
 47        self.f_x = [None] * n_chains
 48        self.stop = False
 49        self.likelihood = lambda x: np.exp(likelihood(x))
 50        self.n_replicas = None
 51        self.estimator = estimator
 52        self.acceptance_ratios = []
 53        self.iteration = 0
 54
 55    def is_finite(self):
 56        return True
 57
 58    def n_samples(self):
 59        return self.n_chains
 60
 61    def __iter__(self):
 62        self.current_chain = 0
 63        return self
 64
 65    def __next__(self):
 66        """Returns next MCMC sample.
 67
 68        Returns
 69        -------
 70        dict
 71           A dictionary where keys are input variables names and values are input values.
 72        """
 73        if self.stop:
 74            raise StopIteration
 75        if self.f_x[self.current_chain] is None:
 76            try:
 77                return self.x[self.current_chain]
 78            finally:
 79                self.current_chain = (self.current_chain + 1) % self.n_chains
 80                if self.current_chain == 0:
 81                    self.stop = True
 82        y = {}
 83        y_ = self.q(self.x[self.current_chain]).sample()
 84        for i, key in enumerate(self.inputs):
 85            y[key] = y_[i][0]
 86        y['chain_id'] = self.current_chain
 87        self.current_chain = (self.current_chain + 1) % self.n_chains
 88        if self.current_chain == 0:
 89            self.stop = True
 90        return y
 91
 92    def update(self, result, invalid):
 93        """Performs the MCMC sampling procedure on the campaign.
 94
 95        Parameters
 96        ----------
 97        result: pandas DataFrame
 98            run information from previous iteration (same as collation DataFrame)
 99        invalid: pandas DataFrame
100            invalid run information (runs that cannot be executed for some reason)
101
102        Returns
103        -------
104        list of rejected run ids
105        """
106        self.stop = False
107        if (self.estimator is not None) and (len(result) > 0):
108            result_grouped = result.groupby(('chain_id', 0)).apply(self.estimator)
109        else:
110            result_grouped = result
111        if (self.estimator is not None) and (len(invalid) > 0):
112            invalid_grouped = invalid.groupby(('chain_id', 0)).apply(lambda x: x.mean())
113        else:
114            invalid_grouped = invalid
115        ignored_chains = []
116        ignored_runs = []
117        # process normal runs
118        for row in result_grouped.iterrows():
119            row = row[1]
120            chain_id = int(row['chain_id'].values[0])
121            y = dict([(key, row[key][0]) for key in self.inputs])
122            if self.f_x[chain_id] is None:
123                self.f_x[chain_id] = self.likelihood(row[self.qoi].values)
124            else:
125                f_y = self.likelihood(row[self.qoi].values)
126                q_xy = self.q(y).pdf([self.x[chain_id][key] for key in self.inputs])
127                q_yx = self.q(self.x[chain_id]).pdf([y[key] for key in self.inputs])
128                if self.f_x[chain_id] == 0.0:
129                    r = 1.0
130                else:
131                    r = min(1.0, (f_y / self.f_x[chain_id]) * (q_xy / q_yx))
132                if np.random.random() < r:
133                    self.x[chain_id] = dict(y)
134                    self.f_x[chain_id] = f_y
135                else:
136                    ignored_chains.append(chain_id)
137        for row in invalid_grouped.iterrows():
138            row = row[1]
139            chain_id = int(row['chain_id'].values[0])
140            ignored_chains.append(chain_id)
141        for chain_id in ignored_chains:
142            try:
143                ignored_runs += list(result.loc[result[('chain_id', 0)]
144                                                == chain_id]['run_id'].values)
145            except KeyError:
146                if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored')
147            try:
148                ignored_runs += list(invalid.loc[invalid[('chain_id', 0)]
149                                                 == chain_id]['run_id'].values)
150            except KeyError:
151                if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored')
152        ignored_runs = [run[0] for run in ignored_runs]
153        self.iteration += 1
154        return ignored_runs
155
156    @property
157    def analysis_class(self):
158        """Returns a corresponding analysis class for this sampler.
159
160        Returns
161        -------
162        class
163        """
164        from easyvvuq.analysis import MCMCAnalysis
165        return MCMCAnalysis

A Metropolis-Hastings MCMC Sampler.

Parameters
  • init (dict): Initial values for each input parameter. Of the form {'input1': value, ...}
  • q (function): A function of one argument X (dictionary) that returns the proposal distribution conditional on the X.
  • qoi (str): Name of the quantity of interest
  • n_chains (int): Number of MCMC chains to run in paralle.
  • estimator (function): To be used with replica_col argument. Outputs an estimate of some parameter when given a sample array.
MCMCSampler( init, q, qoi, n_chains=1, likelihood=<function MCMCSampler.<lambda>>, estimator=None)
25    def __init__(self, init, q, qoi, n_chains=1, likelihood=lambda x: x[0], estimator=None):
26        for param in init:
27            if not hasattr(init[param], '__iter__'):
28                raise RuntimeError(
29                    'all input intializations should be iterables of same length as there are chains')
30            if len(init[param]) != n_chains:
31                raise RuntimeError(
32                    'initialization dictionary should have separate values for each chain')
33        self.init = dict(init)
34        self.inputs = list(self.init.keys())
35        for input_ in self.inputs:
36            if len(self.init[input_]) != n_chains:
37                raise RuntimeError("The init dictionary must contains the same number \
38                                    of values for each input as there are chains.")
39        self.n_chains = n_chains
40        self.x = []
41        self.q = q
42        self.qoi = qoi
43        self.current_chain = 0
44        for chain in range(self.n_chains):
45            self.x.append(dict([(key, self.init[key][chain]) for key in self.inputs]))
46            self.x[chain]['chain_id'] = chain
47        self.f_x = [None] * n_chains
48        self.stop = False
49        self.likelihood = lambda x: np.exp(likelihood(x))
50        self.n_replicas = None
51        self.estimator = estimator
52        self.acceptance_ratios = []
53        self.iteration = 0
init
inputs
n_chains
x
q
qoi
current_chain
f_x
stop
likelihood
n_replicas
estimator
acceptance_ratios
iteration = 0
def is_finite(self):
55    def is_finite(self):
56        return True
def n_samples(self):
58    def n_samples(self):
59        return self.n_chains
def update(self, result, invalid):
 92    def update(self, result, invalid):
 93        """Performs the MCMC sampling procedure on the campaign.
 94
 95        Parameters
 96        ----------
 97        result: pandas DataFrame
 98            run information from previous iteration (same as collation DataFrame)
 99        invalid: pandas DataFrame
100            invalid run information (runs that cannot be executed for some reason)
101
102        Returns
103        -------
104        list of rejected run ids
105        """
106        self.stop = False
107        if (self.estimator is not None) and (len(result) > 0):
108            result_grouped = result.groupby(('chain_id', 0)).apply(self.estimator)
109        else:
110            result_grouped = result
111        if (self.estimator is not None) and (len(invalid) > 0):
112            invalid_grouped = invalid.groupby(('chain_id', 0)).apply(lambda x: x.mean())
113        else:
114            invalid_grouped = invalid
115        ignored_chains = []
116        ignored_runs = []
117        # process normal runs
118        for row in result_grouped.iterrows():
119            row = row[1]
120            chain_id = int(row['chain_id'].values[0])
121            y = dict([(key, row[key][0]) for key in self.inputs])
122            if self.f_x[chain_id] is None:
123                self.f_x[chain_id] = self.likelihood(row[self.qoi].values)
124            else:
125                f_y = self.likelihood(row[self.qoi].values)
126                q_xy = self.q(y).pdf([self.x[chain_id][key] for key in self.inputs])
127                q_yx = self.q(self.x[chain_id]).pdf([y[key] for key in self.inputs])
128                if self.f_x[chain_id] == 0.0:
129                    r = 1.0
130                else:
131                    r = min(1.0, (f_y / self.f_x[chain_id]) * (q_xy / q_yx))
132                if np.random.random() < r:
133                    self.x[chain_id] = dict(y)
134                    self.f_x[chain_id] = f_y
135                else:
136                    ignored_chains.append(chain_id)
137        for row in invalid_grouped.iterrows():
138            row = row[1]
139            chain_id = int(row['chain_id'].values[0])
140            ignored_chains.append(chain_id)
141        for chain_id in ignored_chains:
142            try:
143                ignored_runs += list(result.loc[result[('chain_id', 0)]
144                                                == chain_id]['run_id'].values)
145            except KeyError:
146                if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored')
147            try:
148                ignored_runs += list(invalid.loc[invalid[('chain_id', 0)]
149                                                 == chain_id]['run_id'].values)
150            except KeyError:
151                if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored')
152        ignored_runs = [run[0] for run in ignored_runs]
153        self.iteration += 1
154        return ignored_runs

Performs the MCMC sampling procedure on the campaign.

Parameters
  • result (pandas DataFrame): run information from previous iteration (same as collation DataFrame)
  • invalid (pandas DataFrame): invalid run information (runs that cannot be executed for some reason)
Returns
  • list of rejected run ids
analysis_class
156    @property
157    def analysis_class(self):
158        """Returns a corresponding analysis class for this sampler.
159
160        Returns
161        -------
162        class
163        """
164        from easyvvuq.analysis import MCMCAnalysis
165        return MCMCAnalysis

Returns a corresponding analysis class for this sampler.

Returns
  • class
sampler_name = 'mcmc_sampler'