easyvvuq.decoders.hdf5
A Decoder for HDF5 format files.
1"""A Decoder for HDF5 format files. 2""" 3 4import os 5import logging 6import h5py 7from easyvvuq import OutputType 8 9__copyright__ = """ 10 11 Copyright 2018 Robin A. Richardson, David W. Wright 12 13 This file is part of EasyVVUQ 14 15 EasyVVUQ is free software: you can redistribute it and/or modify 16 it under the terms of the Lesser GNU General Public License as published by 17 the Free Software Foundation, either version 3 of the License, or 18 (at your option) any later version. 19 20 EasyVVUQ is distributed in the hope that it will be useful, 21 but WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 23 Lesser GNU General Public License for more details. 24 25 You should have received a copy of the Lesser GNU General Public License 26 along with this program. If not, see <https://www.gnu.org/licenses/>. 27 28""" 29__license__ = "LGPL" 30 31 32logger = logging.Logger(__name__) 33 34 35class HDF5: 36 """HDF5 Decoder. 37 38 Parameters 39 ---------- 40 target_filename: str 41 Filename of an HDF5 file to decode. 42 ouput_columns: list 43 A list of column names that will be selected to appear in the output. 44 """ 45 46 def __init__(self, target_filename, output_columns): 47 if len(output_columns) == 0: 48 msg = "output_columns cannot be empty." 49 logger.error(msg) 50 raise RuntimeError(msg) 51 self.target_filename = target_filename 52 self.output_columns = output_columns 53 self.output_type = OutputType('sample') 54 55 @staticmethod 56 def _get_output_path(run_info=None, outfile=None): 57 """Constructs absolute path from the `target_filename` and the `run_dir` parameter 58 in the `run_info` retrieved from the database. 59 60 Parameters 61 ---------- 62 run_info: dict 63 Run info as retrieved from the database. 64 outfile: str 65 Filename of the file to be parsed. 66 67 Returns 68 ------- 69 str 70 An absolute path to the output file in the run directory. 71 """ 72 run_path = run_info['run_dir'] 73 if not os.path.isdir(run_path): 74 raise RuntimeError(f"Run directory does not exist: {run_path}") 75 return os.path.join(run_path, outfile) 76 77 def parse_sim_output(self, run_info={}): 78 """Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based 79 format. The file is parsed in such a way that each column will appear as a vector 80 QoI in the output dictionary. 81 82 For example if the file contains the following data 83 a,b 84 1,2 85 3,4 86 87 And both `a` and `b` are specified as `output_columns` the output will look as follows 88 {'a': [1, 3], 'b': [2, 4]}. 89 90 Parameters 91 ---------- 92 run_info: dict 93 Information about the run (used to retrieve construct the absolute path 94 to the CSV file that needs decoding. 95 """ 96 out_path = self._get_output_path(run_info, self.target_filename) 97 results = {} 98 99 with h5py.File(out_path, 'r') as h5f: 100 for column in self.output_columns: 101 try: 102 # TODO: this will always flatten, but HDF5 could handle 103 # 2D or 3D arrays as well. Will probably break the analysis 104 # classes though, but maybe something to incorporate later. 105 results[column] = h5f[column][()].flatten().tolist() 106 except KeyError: 107 raise RuntimeError('column not found in the hdf5 file: {}'.format(column)) 108 109 return results
logger =
<Logger easyvvuq.decoders.hdf5 (NOTSET)>
class
HDF5:
36class HDF5: 37 """HDF5 Decoder. 38 39 Parameters 40 ---------- 41 target_filename: str 42 Filename of an HDF5 file to decode. 43 ouput_columns: list 44 A list of column names that will be selected to appear in the output. 45 """ 46 47 def __init__(self, target_filename, output_columns): 48 if len(output_columns) == 0: 49 msg = "output_columns cannot be empty." 50 logger.error(msg) 51 raise RuntimeError(msg) 52 self.target_filename = target_filename 53 self.output_columns = output_columns 54 self.output_type = OutputType('sample') 55 56 @staticmethod 57 def _get_output_path(run_info=None, outfile=None): 58 """Constructs absolute path from the `target_filename` and the `run_dir` parameter 59 in the `run_info` retrieved from the database. 60 61 Parameters 62 ---------- 63 run_info: dict 64 Run info as retrieved from the database. 65 outfile: str 66 Filename of the file to be parsed. 67 68 Returns 69 ------- 70 str 71 An absolute path to the output file in the run directory. 72 """ 73 run_path = run_info['run_dir'] 74 if not os.path.isdir(run_path): 75 raise RuntimeError(f"Run directory does not exist: {run_path}") 76 return os.path.join(run_path, outfile) 77 78 def parse_sim_output(self, run_info={}): 79 """Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based 80 format. The file is parsed in such a way that each column will appear as a vector 81 QoI in the output dictionary. 82 83 For example if the file contains the following data 84 a,b 85 1,2 86 3,4 87 88 And both `a` and `b` are specified as `output_columns` the output will look as follows 89 {'a': [1, 3], 'b': [2, 4]}. 90 91 Parameters 92 ---------- 93 run_info: dict 94 Information about the run (used to retrieve construct the absolute path 95 to the CSV file that needs decoding. 96 """ 97 out_path = self._get_output_path(run_info, self.target_filename) 98 results = {} 99 100 with h5py.File(out_path, 'r') as h5f: 101 for column in self.output_columns: 102 try: 103 # TODO: this will always flatten, but HDF5 could handle 104 # 2D or 3D arrays as well. Will probably break the analysis 105 # classes though, but maybe something to incorporate later. 106 results[column] = h5f[column][()].flatten().tolist() 107 except KeyError: 108 raise RuntimeError('column not found in the hdf5 file: {}'.format(column)) 109 110 return results
HDF5 Decoder.
Parameters
- target_filename (str): Filename of an HDF5 file to decode.
- ouput_columns (list): A list of column names that will be selected to appear in the output.
HDF5(target_filename, output_columns)
47 def __init__(self, target_filename, output_columns): 48 if len(output_columns) == 0: 49 msg = "output_columns cannot be empty." 50 logger.error(msg) 51 raise RuntimeError(msg) 52 self.target_filename = target_filename 53 self.output_columns = output_columns 54 self.output_type = OutputType('sample')
def
parse_sim_output(self, run_info={}):
78 def parse_sim_output(self, run_info={}): 79 """Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based 80 format. The file is parsed in such a way that each column will appear as a vector 81 QoI in the output dictionary. 82 83 For example if the file contains the following data 84 a,b 85 1,2 86 3,4 87 88 And both `a` and `b` are specified as `output_columns` the output will look as follows 89 {'a': [1, 3], 'b': [2, 4]}. 90 91 Parameters 92 ---------- 93 run_info: dict 94 Information about the run (used to retrieve construct the absolute path 95 to the CSV file that needs decoding. 96 """ 97 out_path = self._get_output_path(run_info, self.target_filename) 98 results = {} 99 100 with h5py.File(out_path, 'r') as h5f: 101 for column in self.output_columns: 102 try: 103 # TODO: this will always flatten, but HDF5 could handle 104 # 2D or 3D arrays as well. Will probably break the analysis 105 # classes though, but maybe something to incorporate later. 106 results[column] = h5f[column][()].flatten().tolist() 107 except KeyError: 108 raise RuntimeError('column not found in the hdf5 file: {}'.format(column)) 109 110 return results
Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based format. The file is parsed in such a way that each column will appear as a vector QoI in the output dictionary.
For example if the file contains the following data a,b 1,2 3,4
And both a and b are specified as output_columns the output will look as follows
{'a': [1, 3], 'b': [2, 4]}.
Parameters
- run_info (dict): Information about the run (used to retrieve construct the absolute path to the CSV file that needs decoding.