easyvvuq.decoders.hdf5

A Decoder for HDF5 format files.

View Source

  1"""A Decoder for HDF5 format files.
  2"""
  3
  4import os
  5import logging
  6import h5py
  7from easyvvuq import OutputType
  8
  9__copyright__ = """
 10
 11    Copyright 2018 Robin A. Richardson, David W. Wright
 12
 13    This file is part of EasyVVUQ
 14
 15    EasyVVUQ is free software: you can redistribute it and/or modify
 16    it under the terms of the Lesser GNU General Public License as published by
 17    the Free Software Foundation, either version 3 of the License, or
 18    (at your option) any later version.
 19
 20    EasyVVUQ is distributed in the hope that it will be useful,
 21    but WITHOUT ANY WARRANTY; without even the implied warranty of
 22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 23    Lesser GNU General Public License for more details.
 24
 25    You should have received a copy of the Lesser GNU General Public License
 26    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 27
 28"""
 29__license__ = "LGPL"
 30
 31
 32logger = logging.Logger(__name__)
 33
 34
 35class HDF5:
 36    """HDF5 Decoder.
 37
 38    Parameters
 39    ----------
 40    target_filename: str
 41        Filename of an HDF5 file to decode.
 42    ouput_columns: list
 43        A list of column names that will be selected to appear in the output.
 44    """
 45
 46    def __init__(self, target_filename, output_columns):
 47        if len(output_columns) == 0:
 48            msg = "output_columns cannot be empty."
 49            logger.error(msg)
 50            raise RuntimeError(msg)
 51        self.target_filename = target_filename
 52        self.output_columns = output_columns
 53        self.output_type = OutputType('sample')
 54
 55    @staticmethod
 56    def _get_output_path(run_info=None, outfile=None):
 57        """Constructs absolute path from the `target_filename` and the `run_dir` parameter
 58        in the `run_info` retrieved from the database.
 59
 60        Parameters
 61        ----------
 62        run_info: dict
 63            Run info as retrieved from the database.
 64        outfile: str
 65            Filename of the file to be parsed.
 66
 67        Returns
 68        -------
 69        str
 70            An absolute path to the output file in the run directory.
 71        """
 72        run_path = run_info['run_dir']
 73        if not os.path.isdir(run_path):
 74            raise RuntimeError(f"Run directory does not exist: {run_path}")
 75        return os.path.join(run_path, outfile)
 76
 77    def parse_sim_output(self, run_info={}):
 78        """Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based
 79        format. The file is parsed in such a way that each column will appear as a vector
 80        QoI in the output dictionary.
 81
 82        For example if the file contains the following data
 83        a,b
 84        1,2
 85        3,4
 86
 87        And both `a` and `b` are specified as `output_columns` the output will look as follows
 88        {'a': [1, 3], 'b': [2, 4]}.
 89
 90        Parameters
 91        ----------
 92        run_info: dict
 93            Information about the run (used to retrieve construct the absolute path
 94            to the CSV file that needs decoding.
 95        """
 96        out_path = self._get_output_path(run_info, self.target_filename)
 97        results = {}
 98
 99        with h5py.File(out_path, 'r') as h5f:
100            for column in self.output_columns:
101                try:
102                    # TODO: this will always flatten, but HDF5 could handle
103                    # 2D or 3D arrays as well. Will probably break the analysis
104                    # classes though, but maybe something to incorporate later.
105                    results[column] = h5f[column][()].flatten().tolist()
106                except KeyError:
107                    raise RuntimeError('column not found in the hdf5 file: {}'.format(column))
108
109        return results

logger = <Logger easyvvuq.decoders.hdf5 (NOTSET)>

class HDF5: View Source

 36class HDF5:
 37    """HDF5 Decoder.
 38
 39    Parameters
 40    ----------
 41    target_filename: str
 42        Filename of an HDF5 file to decode.
 43    ouput_columns: list
 44        A list of column names that will be selected to appear in the output.
 45    """
 46
 47    def __init__(self, target_filename, output_columns):
 48        if len(output_columns) == 0:
 49            msg = "output_columns cannot be empty."
 50            logger.error(msg)
 51            raise RuntimeError(msg)
 52        self.target_filename = target_filename
 53        self.output_columns = output_columns
 54        self.output_type = OutputType('sample')
 55
 56    @staticmethod
 57    def _get_output_path(run_info=None, outfile=None):
 58        """Constructs absolute path from the `target_filename` and the `run_dir` parameter
 59        in the `run_info` retrieved from the database.
 60
 61        Parameters
 62        ----------
 63        run_info: dict
 64            Run info as retrieved from the database.
 65        outfile: str
 66            Filename of the file to be parsed.
 67
 68        Returns
 69        -------
 70        str
 71            An absolute path to the output file in the run directory.
 72        """
 73        run_path = run_info['run_dir']
 74        if not os.path.isdir(run_path):
 75            raise RuntimeError(f"Run directory does not exist: {run_path}")
 76        return os.path.join(run_path, outfile)
 77
 78    def parse_sim_output(self, run_info={}):
 79        """Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based
 80        format. The file is parsed in such a way that each column will appear as a vector
 81        QoI in the output dictionary.
 82
 83        For example if the file contains the following data
 84        a,b
 85        1,2
 86        3,4
 87
 88        And both `a` and `b` are specified as `output_columns` the output will look as follows
 89        {'a': [1, 3], 'b': [2, 4]}.
 90
 91        Parameters
 92        ----------
 93        run_info: dict
 94            Information about the run (used to retrieve construct the absolute path
 95            to the CSV file that needs decoding.
 96        """
 97        out_path = self._get_output_path(run_info, self.target_filename)
 98        results = {}
 99
100        with h5py.File(out_path, 'r') as h5f:
101            for column in self.output_columns:
102                try:
103                    # TODO: this will always flatten, but HDF5 could handle
104                    # 2D or 3D arrays as well. Will probably break the analysis
105                    # classes though, but maybe something to incorporate later.
106                    results[column] = h5f[column][()].flatten().tolist()
107                except KeyError:
108                    raise RuntimeError('column not found in the hdf5 file: {}'.format(column))
109
110        return results

HDF5 Decoder.

Parameters

target_filename (str): Filename of an HDF5 file to decode.
ouput_columns (list): A list of column names that will be selected to appear in the output.

HDF5(target_filename, output_columns) View Source

47    def __init__(self, target_filename, output_columns):
48        if len(output_columns) == 0:
49            msg = "output_columns cannot be empty."
50            logger.error(msg)
51            raise RuntimeError(msg)
52        self.target_filename = target_filename
53        self.output_columns = output_columns
54        self.output_type = OutputType('sample')

target_filename

output_columns

output_type

def parse_sim_output(self, run_info={}): View Source

 78    def parse_sim_output(self, run_info={}):
 79        """Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based
 80        format. The file is parsed in such a way that each column will appear as a vector
 81        QoI in the output dictionary.
 82
 83        For example if the file contains the following data
 84        a,b
 85        1,2
 86        3,4
 87
 88        And both `a` and `b` are specified as `output_columns` the output will look as follows
 89        {'a': [1, 3], 'b': [2, 4]}.
 90
 91        Parameters
 92        ----------
 93        run_info: dict
 94            Information about the run (used to retrieve construct the absolute path
 95            to the CSV file that needs decoding.
 96        """
 97        out_path = self._get_output_path(run_info, self.target_filename)
 98        results = {}
 99
100        with h5py.File(out_path, 'r') as h5f:
101            for column in self.output_columns:
102                try:
103                    # TODO: this will always flatten, but HDF5 could handle
104                    # 2D or 3D arrays as well. Will probably break the analysis
105                    # classes though, but maybe something to incorporate later.
106                    results[column] = h5f[column][()].flatten().tolist()
107                except KeyError:
108                    raise RuntimeError('column not found in the hdf5 file: {}'.format(column))
109
110        return results

Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based format. The file is parsed in such a way that each column will appear as a vector QoI in the output dictionary.

For example if the file contains the following data a,b 1,2 3,4

And both a and b are specified as output_columns the output will look as follows {'a': [1, 3], 'b': [2, 4]}.

Parameters

run_info (dict): Information about the run (used to retrieve construct the absolute path to the CSV file that needs decoding.