easyvvuq.sampling.mcmc
1from .base import BaseSamplingElement 2import numpy as np 3import os 4 5class MCMCSampler(BaseSamplingElement, sampler_name='mcmc_sampler'): 6 """A Metropolis-Hastings MCMC Sampler. 7 8 Parameters 9 ---------- 10 init: dict 11 Initial values for each input parameter. Of the form {'input1': value, ...} 12 q: function 13 A function of one argument X (dictionary) that returns the proposal distribution conditional on 14 the `X`. 15 qoi: str 16 Name of the quantity of interest 17 n_chains: int 18 Number of MCMC chains to run in paralle. 19 estimator: function 20 To be used with replica_col argument. Outputs an estimate of some 21 parameter when given a sample array. 22 """ 23 24 def __init__(self, init, q, qoi, n_chains=1, likelihood=lambda x: x[0], estimator=None): 25 for param in init: 26 if not hasattr(init[param], '__iter__'): 27 raise RuntimeError( 28 'all input intializations should be iterables of same length as there are chains') 29 if len(init[param]) != n_chains: 30 raise RuntimeError( 31 'initialization dictionary should have separate values for each chain') 32 self.init = dict(init) 33 self.inputs = list(self.init.keys()) 34 for input_ in self.inputs: 35 if len(self.init[input_]) != n_chains: 36 raise RuntimeError("The init dictionary must contains the same number \ 37 of values for each input as there are chains.") 38 self.n_chains = n_chains 39 self.x = [] 40 self.q = q 41 self.qoi = qoi 42 self.current_chain = 0 43 for chain in range(self.n_chains): 44 self.x.append(dict([(key, self.init[key][chain]) for key in self.inputs])) 45 self.x[chain]['chain_id'] = chain 46 self.f_x = [None] * n_chains 47 self.stop = False 48 self.likelihood = lambda x: np.exp(likelihood(x)) 49 self.n_replicas = None 50 self.estimator = estimator 51 self.acceptance_ratios = [] 52 self.iteration = 0 53 54 def is_finite(self): 55 return True 56 57 def n_samples(self): 58 return self.n_chains 59 60 def __iter__(self): 61 self.current_chain = 0 62 return self 63 64 def __next__(self): 65 """Returns next MCMC sample. 66 67 Returns 68 ------- 69 dict 70 A dictionary where keys are input variables names and values are input values. 71 """ 72 if self.stop: 73 raise StopIteration 74 if self.f_x[self.current_chain] is None: 75 try: 76 return self.x[self.current_chain] 77 finally: 78 self.current_chain = (self.current_chain + 1) % self.n_chains 79 if self.current_chain == 0: 80 self.stop = True 81 y = {} 82 y_ = self.q(self.x[self.current_chain]).sample() 83 for i, key in enumerate(self.inputs): 84 y[key] = y_[i][0] 85 y['chain_id'] = self.current_chain 86 self.current_chain = (self.current_chain + 1) % self.n_chains 87 if self.current_chain == 0: 88 self.stop = True 89 return y 90 91 def update(self, result, invalid): 92 """Performs the MCMC sampling procedure on the campaign. 93 94 Parameters 95 ---------- 96 result: pandas DataFrame 97 run information from previous iteration (same as collation DataFrame) 98 invalid: pandas DataFrame 99 invalid run information (runs that cannot be executed for some reason) 100 101 Returns 102 ------- 103 list of rejected run ids 104 """ 105 self.stop = False 106 if (self.estimator is not None) and (len(result) > 0): 107 result_grouped = result.groupby(('chain_id', 0)).apply(self.estimator) 108 else: 109 result_grouped = result 110 if (self.estimator is not None) and (len(invalid) > 0): 111 invalid_grouped = invalid.groupby(('chain_id', 0)).apply(lambda x: x.mean()) 112 else: 113 invalid_grouped = invalid 114 ignored_chains = [] 115 ignored_runs = [] 116 # process normal runs 117 for row in result_grouped.iterrows(): 118 row = row[1] 119 chain_id = int(row['chain_id'].values[0]) 120 y = dict([(key, row[key][0]) for key in self.inputs]) 121 if self.f_x[chain_id] is None: 122 self.f_x[chain_id] = self.likelihood(row[self.qoi].values) 123 else: 124 f_y = self.likelihood(row[self.qoi].values) 125 q_xy = self.q(y).pdf([self.x[chain_id][key] for key in self.inputs]) 126 q_yx = self.q(self.x[chain_id]).pdf([y[key] for key in self.inputs]) 127 if self.f_x[chain_id] == 0.0: 128 r = 1.0 129 else: 130 r = min(1.0, (f_y / self.f_x[chain_id]) * (q_xy / q_yx)) 131 if np.random.random() < r: 132 self.x[chain_id] = dict(y) 133 self.f_x[chain_id] = f_y 134 else: 135 ignored_chains.append(chain_id) 136 for row in invalid_grouped.iterrows(): 137 row = row[1] 138 chain_id = int(row['chain_id'].values[0]) 139 ignored_chains.append(chain_id) 140 for chain_id in ignored_chains: 141 try: 142 ignored_runs += list(result.loc[result[('chain_id', 0)] 143 == chain_id]['run_id'].values) 144 except KeyError: 145 if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored') 146 try: 147 ignored_runs += list(invalid.loc[invalid[('chain_id', 0)] 148 == chain_id]['run_id'].values) 149 except KeyError: 150 if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored') 151 ignored_runs = [run[0] for run in ignored_runs] 152 self.iteration += 1 153 return ignored_runs 154 155 @property 156 def analysis_class(self): 157 """Returns a corresponding analysis class for this sampler. 158 159 Returns 160 ------- 161 class 162 """ 163 from easyvvuq.analysis import MCMCAnalysis 164 return MCMCAnalysis
6class MCMCSampler(BaseSamplingElement, sampler_name='mcmc_sampler'): 7 """A Metropolis-Hastings MCMC Sampler. 8 9 Parameters 10 ---------- 11 init: dict 12 Initial values for each input parameter. Of the form {'input1': value, ...} 13 q: function 14 A function of one argument X (dictionary) that returns the proposal distribution conditional on 15 the `X`. 16 qoi: str 17 Name of the quantity of interest 18 n_chains: int 19 Number of MCMC chains to run in paralle. 20 estimator: function 21 To be used with replica_col argument. Outputs an estimate of some 22 parameter when given a sample array. 23 """ 24 25 def __init__(self, init, q, qoi, n_chains=1, likelihood=lambda x: x[0], estimator=None): 26 for param in init: 27 if not hasattr(init[param], '__iter__'): 28 raise RuntimeError( 29 'all input intializations should be iterables of same length as there are chains') 30 if len(init[param]) != n_chains: 31 raise RuntimeError( 32 'initialization dictionary should have separate values for each chain') 33 self.init = dict(init) 34 self.inputs = list(self.init.keys()) 35 for input_ in self.inputs: 36 if len(self.init[input_]) != n_chains: 37 raise RuntimeError("The init dictionary must contains the same number \ 38 of values for each input as there are chains.") 39 self.n_chains = n_chains 40 self.x = [] 41 self.q = q 42 self.qoi = qoi 43 self.current_chain = 0 44 for chain in range(self.n_chains): 45 self.x.append(dict([(key, self.init[key][chain]) for key in self.inputs])) 46 self.x[chain]['chain_id'] = chain 47 self.f_x = [None] * n_chains 48 self.stop = False 49 self.likelihood = lambda x: np.exp(likelihood(x)) 50 self.n_replicas = None 51 self.estimator = estimator 52 self.acceptance_ratios = [] 53 self.iteration = 0 54 55 def is_finite(self): 56 return True 57 58 def n_samples(self): 59 return self.n_chains 60 61 def __iter__(self): 62 self.current_chain = 0 63 return self 64 65 def __next__(self): 66 """Returns next MCMC sample. 67 68 Returns 69 ------- 70 dict 71 A dictionary where keys are input variables names and values are input values. 72 """ 73 if self.stop: 74 raise StopIteration 75 if self.f_x[self.current_chain] is None: 76 try: 77 return self.x[self.current_chain] 78 finally: 79 self.current_chain = (self.current_chain + 1) % self.n_chains 80 if self.current_chain == 0: 81 self.stop = True 82 y = {} 83 y_ = self.q(self.x[self.current_chain]).sample() 84 for i, key in enumerate(self.inputs): 85 y[key] = y_[i][0] 86 y['chain_id'] = self.current_chain 87 self.current_chain = (self.current_chain + 1) % self.n_chains 88 if self.current_chain == 0: 89 self.stop = True 90 return y 91 92 def update(self, result, invalid): 93 """Performs the MCMC sampling procedure on the campaign. 94 95 Parameters 96 ---------- 97 result: pandas DataFrame 98 run information from previous iteration (same as collation DataFrame) 99 invalid: pandas DataFrame 100 invalid run information (runs that cannot be executed for some reason) 101 102 Returns 103 ------- 104 list of rejected run ids 105 """ 106 self.stop = False 107 if (self.estimator is not None) and (len(result) > 0): 108 result_grouped = result.groupby(('chain_id', 0)).apply(self.estimator) 109 else: 110 result_grouped = result 111 if (self.estimator is not None) and (len(invalid) > 0): 112 invalid_grouped = invalid.groupby(('chain_id', 0)).apply(lambda x: x.mean()) 113 else: 114 invalid_grouped = invalid 115 ignored_chains = [] 116 ignored_runs = [] 117 # process normal runs 118 for row in result_grouped.iterrows(): 119 row = row[1] 120 chain_id = int(row['chain_id'].values[0]) 121 y = dict([(key, row[key][0]) for key in self.inputs]) 122 if self.f_x[chain_id] is None: 123 self.f_x[chain_id] = self.likelihood(row[self.qoi].values) 124 else: 125 f_y = self.likelihood(row[self.qoi].values) 126 q_xy = self.q(y).pdf([self.x[chain_id][key] for key in self.inputs]) 127 q_yx = self.q(self.x[chain_id]).pdf([y[key] for key in self.inputs]) 128 if self.f_x[chain_id] == 0.0: 129 r = 1.0 130 else: 131 r = min(1.0, (f_y / self.f_x[chain_id]) * (q_xy / q_yx)) 132 if np.random.random() < r: 133 self.x[chain_id] = dict(y) 134 self.f_x[chain_id] = f_y 135 else: 136 ignored_chains.append(chain_id) 137 for row in invalid_grouped.iterrows(): 138 row = row[1] 139 chain_id = int(row['chain_id'].values[0]) 140 ignored_chains.append(chain_id) 141 for chain_id in ignored_chains: 142 try: 143 ignored_runs += list(result.loc[result[('chain_id', 0)] 144 == chain_id]['run_id'].values) 145 except KeyError: 146 if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored') 147 try: 148 ignored_runs += list(invalid.loc[invalid[('chain_id', 0)] 149 == chain_id]['run_id'].values) 150 except KeyError: 151 if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored') 152 ignored_runs = [run[0] for run in ignored_runs] 153 self.iteration += 1 154 return ignored_runs 155 156 @property 157 def analysis_class(self): 158 """Returns a corresponding analysis class for this sampler. 159 160 Returns 161 ------- 162 class 163 """ 164 from easyvvuq.analysis import MCMCAnalysis 165 return MCMCAnalysis
A Metropolis-Hastings MCMC Sampler.
Parameters
- init (dict): Initial values for each input parameter. Of the form {'input1': value, ...}
- q (function):
A function of one argument X (dictionary) that returns the proposal distribution conditional on
the
X. - qoi (str): Name of the quantity of interest
- n_chains (int): Number of MCMC chains to run in paralle.
- estimator (function): To be used with replica_col argument. Outputs an estimate of some parameter when given a sample array.
MCMCSampler( init, q, qoi, n_chains=1, likelihood=<function MCMCSampler.<lambda>>, estimator=None)
25 def __init__(self, init, q, qoi, n_chains=1, likelihood=lambda x: x[0], estimator=None): 26 for param in init: 27 if not hasattr(init[param], '__iter__'): 28 raise RuntimeError( 29 'all input intializations should be iterables of same length as there are chains') 30 if len(init[param]) != n_chains: 31 raise RuntimeError( 32 'initialization dictionary should have separate values for each chain') 33 self.init = dict(init) 34 self.inputs = list(self.init.keys()) 35 for input_ in self.inputs: 36 if len(self.init[input_]) != n_chains: 37 raise RuntimeError("The init dictionary must contains the same number \ 38 of values for each input as there are chains.") 39 self.n_chains = n_chains 40 self.x = [] 41 self.q = q 42 self.qoi = qoi 43 self.current_chain = 0 44 for chain in range(self.n_chains): 45 self.x.append(dict([(key, self.init[key][chain]) for key in self.inputs])) 46 self.x[chain]['chain_id'] = chain 47 self.f_x = [None] * n_chains 48 self.stop = False 49 self.likelihood = lambda x: np.exp(likelihood(x)) 50 self.n_replicas = None 51 self.estimator = estimator 52 self.acceptance_ratios = [] 53 self.iteration = 0
def
update(self, result, invalid):
92 def update(self, result, invalid): 93 """Performs the MCMC sampling procedure on the campaign. 94 95 Parameters 96 ---------- 97 result: pandas DataFrame 98 run information from previous iteration (same as collation DataFrame) 99 invalid: pandas DataFrame 100 invalid run information (runs that cannot be executed for some reason) 101 102 Returns 103 ------- 104 list of rejected run ids 105 """ 106 self.stop = False 107 if (self.estimator is not None) and (len(result) > 0): 108 result_grouped = result.groupby(('chain_id', 0)).apply(self.estimator) 109 else: 110 result_grouped = result 111 if (self.estimator is not None) and (len(invalid) > 0): 112 invalid_grouped = invalid.groupby(('chain_id', 0)).apply(lambda x: x.mean()) 113 else: 114 invalid_grouped = invalid 115 ignored_chains = [] 116 ignored_runs = [] 117 # process normal runs 118 for row in result_grouped.iterrows(): 119 row = row[1] 120 chain_id = int(row['chain_id'].values[0]) 121 y = dict([(key, row[key][0]) for key in self.inputs]) 122 if self.f_x[chain_id] is None: 123 self.f_x[chain_id] = self.likelihood(row[self.qoi].values) 124 else: 125 f_y = self.likelihood(row[self.qoi].values) 126 q_xy = self.q(y).pdf([self.x[chain_id][key] for key in self.inputs]) 127 q_yx = self.q(self.x[chain_id]).pdf([y[key] for key in self.inputs]) 128 if self.f_x[chain_id] == 0.0: 129 r = 1.0 130 else: 131 r = min(1.0, (f_y / self.f_x[chain_id]) * (q_xy / q_yx)) 132 if np.random.random() < r: 133 self.x[chain_id] = dict(y) 134 self.f_x[chain_id] = f_y 135 else: 136 ignored_chains.append(chain_id) 137 for row in invalid_grouped.iterrows(): 138 row = row[1] 139 chain_id = int(row['chain_id'].values[0]) 140 ignored_chains.append(chain_id) 141 for chain_id in ignored_chains: 142 try: 143 ignored_runs += list(result.loc[result[('chain_id', 0)] 144 == chain_id]['run_id'].values) 145 except KeyError: 146 if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored') 147 try: 148 ignored_runs += list(invalid.loc[invalid[('chain_id', 0)] 149 == chain_id]['run_id'].values) 150 except KeyError: 151 if os.getenv("EasyVVUQ_Debug"): print('KeyError raised and ignored') 152 ignored_runs = [run[0] for run in ignored_runs] 153 self.iteration += 1 154 return ignored_runs
Performs the MCMC sampling procedure on the campaign.
Parameters
- result (pandas DataFrame): run information from previous iteration (same as collation DataFrame)
- invalid (pandas DataFrame): invalid run information (runs that cannot be executed for some reason)
Returns
- list of rejected run ids
analysis_class
156 @property 157 def analysis_class(self): 158 """Returns a corresponding analysis class for this sampler. 159 160 Returns 161 ------- 162 class 163 """ 164 from easyvvuq.analysis import MCMCAnalysis 165 return MCMCAnalysis
Returns a corresponding analysis class for this sampler.
Returns
- class