easyvvuq.decoders.robust_csv

A Decoder for CSV format files.

View Source

  1"""A Decoder for CSV format files.
  2"""
  3
  4import os
  5import logging
  6import csv
  7from easyvvuq import OutputType
  8
  9__copyright__ = """
 10
 11    Copyright 2018 Robin A. Richardson, David W. Wright, Juraj Kardos
 12
 13    This file is part of EasyVVUQ
 14
 15    EasyVVUQ is free software: you can redistribute it and/or modify
 16    it under the terms of the Lesser GNU General Public License as published by
 17    the Free Software Foundation, either version 3 of the License, or
 18    (at your option) any later version.
 19
 20    EasyVVUQ is distributed in the hope that it will be useful,
 21    but WITHOUT ANY WARRANTY; without even the implied warranty of
 22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 23    Lesser GNU General Public License for more details.
 24
 25    You should have received a copy of the Lesser GNU General Public License
 26    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 27
 28"""
 29__license__ = "LGPL"
 30
 31
 32logger = logging.Logger(__name__)
 33
 34
 35class RobustCSV:
 36    """CSV Decoder.
 37
 38    Parameters
 39    ----------
 40    target_filename: str
 41        Filename of a CSV file to decode.
 42    ouput_columns: list
 43        A list of column names that will be selected to appear in the output.
 44    """
 45    def __init__(self, target_filename, output_columns, dialect='excel'):
 46        if len(output_columns) == 0:
 47            msg = "output_columns cannot be empty."
 48            logger.error(msg)
 49            raise RuntimeError(msg)
 50        self.target_filename = target_filename
 51        self.output_columns = output_columns
 52        self.output_type = OutputType('sample')
 53        self.dialect = dialect
 54
 55    @staticmethod
 56    def _get_output_path(run_info=None, outfile=None):
 57        """Constructs absolute path from the `target_filename` and the `run_dir` parameter
 58        in the `run_info` retrieved from the database.
 59        
 60        Parameters
 61        ----------
 62        run_info: dict
 63            Run info as retrieved from the database.
 64        outfile: str
 65            Filename of the file to be parsed.
 66
 67        Returns
 68        -------
 69        str
 70            An absolute path to the output file in the run directory.
 71        """
 72        run_path = run_info['run_dir']
 73        if not os.path.isdir(run_path):
 74            raise RuntimeError(f"Run directory does not exist: {run_path}")
 75        return os.path.join(run_path, outfile)
 76
 77    def parse_sim_output(self, run_info={}):
 78        """Parses the CSV file and converts it to the EasyVVUQ internal dictionary based
 79        format. The file is parsed in such a way that each column will appear as a vector
 80        QoI in the output dictionary.
 81
 82        For example if the file contains the following data
 83        a,b
 84        1,2
 85        3,4
 86
 87        And both `a` and `b` are specified as `output_columns` the output will look as follows
 88        {'a': [1, 3], 'b': [2, 4]}.
 89
 90        Parameters
 91        ----------
 92        run_info: dict
 93            Information about the run (used to retrieve construct the absolute path
 94            to the CSV file that needs decoding.
 95        """
 96        out_path = self._get_output_path(run_info, self.target_filename)
 97
 98        results = {}
 99        for column in self.output_columns:
100            results[column] = []
101
102        # Test if the ouput file exists
103        # e.g. the simulation could have failed
104        # thus no output was produced, fill in with Nan if missing
105        if not os.path.isfile(out_path):
106            print(f"Ouput file {out_path} does not exist, using NaN values")            
107            run_path = run_info['run_dir'] #e.g xxx/xxx/xxx/run_123            
108            run_prefix = "/".join(run_path.split("/")[0:-1]) #e.g xxx/xxx/xxx
109            run_dir = run_path.split("/")[-1] #e.g run_123
110            run_id = int(run_dir.split("_")[1]) #e.g. 123
111
112            # Test if some of nearby valid ouput file exists,
113            # explore range run_(id-10) -- run_(id+10)
114            # We read such file instead, and use NaN values instead of
115            # the acutal values, in this way the output will have 
116            # the correct data dimension, but filled with NaN
117            counter = -10
118            while counter < 10:
119                out_path_aux = "/".join([run_prefix, "run_"+str(run_id+counter), self.target_filename])
120                print(f"Testing existence of file {out_path_aux}")
121                if not os.path.isfile(out_path_aux):
122                    counter = counter + 1
123                    continue
124                else:
125                    print(f"Reading file {out_path_aux} in order to have the appropriate dimension of NaN values")
126                    with open(out_path_aux, 'r', newline='') as csvfile:
127                        reader = csv.DictReader(csvfile, dialect=self.dialect)
128                        no_lines = len(list(reader))
129                        for i in range(0, no_lines):
130                            for column in self.output_columns:
131                                results[column].append(float("nan"))
132                    break
133
134            if counter == 10:
135                raise RuntimeError('Could not find valid output csv file in vicinity of: {}'.format(out_path))
136
137            
138        else:
139            with open(out_path, 'r', newline='') as csvfile:
140                reader = csv.DictReader(csvfile, dialect=self.dialect)
141                for row in reader:
142                    for column in self.output_columns:
143                        try:
144                            results[column].append(float(row[column]))
145                        except ValueError:
146                            results[column].append(row[column])
147                        except KeyError:
148                            raise RuntimeError('column not found in the csv file: {}'.format(column))
149
150        return results

logger = <Logger easyvvuq.decoders.robust_csv (NOTSET)>

class RobustCSV: View Source

 36class RobustCSV:
 37    """CSV Decoder.
 38
 39    Parameters
 40    ----------
 41    target_filename: str
 42        Filename of a CSV file to decode.
 43    ouput_columns: list
 44        A list of column names that will be selected to appear in the output.
 45    """
 46    def __init__(self, target_filename, output_columns, dialect='excel'):
 47        if len(output_columns) == 0:
 48            msg = "output_columns cannot be empty."
 49            logger.error(msg)
 50            raise RuntimeError(msg)
 51        self.target_filename = target_filename
 52        self.output_columns = output_columns
 53        self.output_type = OutputType('sample')
 54        self.dialect = dialect
 55
 56    @staticmethod
 57    def _get_output_path(run_info=None, outfile=None):
 58        """Constructs absolute path from the `target_filename` and the `run_dir` parameter
 59        in the `run_info` retrieved from the database.
 60        
 61        Parameters
 62        ----------
 63        run_info: dict
 64            Run info as retrieved from the database.
 65        outfile: str
 66            Filename of the file to be parsed.
 67
 68        Returns
 69        -------
 70        str
 71            An absolute path to the output file in the run directory.
 72        """
 73        run_path = run_info['run_dir']
 74        if not os.path.isdir(run_path):
 75            raise RuntimeError(f"Run directory does not exist: {run_path}")
 76        return os.path.join(run_path, outfile)
 77
 78    def parse_sim_output(self, run_info={}):
 79        """Parses the CSV file and converts it to the EasyVVUQ internal dictionary based
 80        format. The file is parsed in such a way that each column will appear as a vector
 81        QoI in the output dictionary.
 82
 83        For example if the file contains the following data
 84        a,b
 85        1,2
 86        3,4
 87
 88        And both `a` and `b` are specified as `output_columns` the output will look as follows
 89        {'a': [1, 3], 'b': [2, 4]}.
 90
 91        Parameters
 92        ----------
 93        run_info: dict
 94            Information about the run (used to retrieve construct the absolute path
 95            to the CSV file that needs decoding.
 96        """
 97        out_path = self._get_output_path(run_info, self.target_filename)
 98
 99        results = {}
100        for column in self.output_columns:
101            results[column] = []
102
103        # Test if the ouput file exists
104        # e.g. the simulation could have failed
105        # thus no output was produced, fill in with Nan if missing
106        if not os.path.isfile(out_path):
107            print(f"Ouput file {out_path} does not exist, using NaN values")            
108            run_path = run_info['run_dir'] #e.g xxx/xxx/xxx/run_123            
109            run_prefix = "/".join(run_path.split("/")[0:-1]) #e.g xxx/xxx/xxx
110            run_dir = run_path.split("/")[-1] #e.g run_123
111            run_id = int(run_dir.split("_")[1]) #e.g. 123
112
113            # Test if some of nearby valid ouput file exists,
114            # explore range run_(id-10) -- run_(id+10)
115            # We read such file instead, and use NaN values instead of
116            # the acutal values, in this way the output will have 
117            # the correct data dimension, but filled with NaN
118            counter = -10
119            while counter < 10:
120                out_path_aux = "/".join([run_prefix, "run_"+str(run_id+counter), self.target_filename])
121                print(f"Testing existence of file {out_path_aux}")
122                if not os.path.isfile(out_path_aux):
123                    counter = counter + 1
124                    continue
125                else:
126                    print(f"Reading file {out_path_aux} in order to have the appropriate dimension of NaN values")
127                    with open(out_path_aux, 'r', newline='') as csvfile:
128                        reader = csv.DictReader(csvfile, dialect=self.dialect)
129                        no_lines = len(list(reader))
130                        for i in range(0, no_lines):
131                            for column in self.output_columns:
132                                results[column].append(float("nan"))
133                    break
134
135            if counter == 10:
136                raise RuntimeError('Could not find valid output csv file in vicinity of: {}'.format(out_path))
137
138            
139        else:
140            with open(out_path, 'r', newline='') as csvfile:
141                reader = csv.DictReader(csvfile, dialect=self.dialect)
142                for row in reader:
143                    for column in self.output_columns:
144                        try:
145                            results[column].append(float(row[column]))
146                        except ValueError:
147                            results[column].append(row[column])
148                        except KeyError:
149                            raise RuntimeError('column not found in the csv file: {}'.format(column))
150
151        return results

CSV Decoder.

Parameters

target_filename (str): Filename of a CSV file to decode.
ouput_columns (list): A list of column names that will be selected to appear in the output.

RobustCSV(target_filename, output_columns, dialect='excel') View Source

46    def __init__(self, target_filename, output_columns, dialect='excel'):
47        if len(output_columns) == 0:
48            msg = "output_columns cannot be empty."
49            logger.error(msg)
50            raise RuntimeError(msg)
51        self.target_filename = target_filename
52        self.output_columns = output_columns
53        self.output_type = OutputType('sample')
54        self.dialect = dialect

target_filename

output_columns

output_type

dialect

def parse_sim_output(self, run_info={}): View Source

 78    def parse_sim_output(self, run_info={}):
 79        """Parses the CSV file and converts it to the EasyVVUQ internal dictionary based
 80        format. The file is parsed in such a way that each column will appear as a vector
 81        QoI in the output dictionary.
 82
 83        For example if the file contains the following data
 84        a,b
 85        1,2
 86        3,4
 87
 88        And both `a` and `b` are specified as `output_columns` the output will look as follows
 89        {'a': [1, 3], 'b': [2, 4]}.
 90
 91        Parameters
 92        ----------
 93        run_info: dict
 94            Information about the run (used to retrieve construct the absolute path
 95            to the CSV file that needs decoding.
 96        """
 97        out_path = self._get_output_path(run_info, self.target_filename)
 98
 99        results = {}
100        for column in self.output_columns:
101            results[column] = []
102
103        # Test if the ouput file exists
104        # e.g. the simulation could have failed
105        # thus no output was produced, fill in with Nan if missing
106        if not os.path.isfile(out_path):
107            print(f"Ouput file {out_path} does not exist, using NaN values")            
108            run_path = run_info['run_dir'] #e.g xxx/xxx/xxx/run_123            
109            run_prefix = "/".join(run_path.split("/")[0:-1]) #e.g xxx/xxx/xxx
110            run_dir = run_path.split("/")[-1] #e.g run_123
111            run_id = int(run_dir.split("_")[1]) #e.g. 123
112
113            # Test if some of nearby valid ouput file exists,
114            # explore range run_(id-10) -- run_(id+10)
115            # We read such file instead, and use NaN values instead of
116            # the acutal values, in this way the output will have 
117            # the correct data dimension, but filled with NaN
118            counter = -10
119            while counter < 10:
120                out_path_aux = "/".join([run_prefix, "run_"+str(run_id+counter), self.target_filename])
121                print(f"Testing existence of file {out_path_aux}")
122                if not os.path.isfile(out_path_aux):
123                    counter = counter + 1
124                    continue
125                else:
126                    print(f"Reading file {out_path_aux} in order to have the appropriate dimension of NaN values")
127                    with open(out_path_aux, 'r', newline='') as csvfile:
128                        reader = csv.DictReader(csvfile, dialect=self.dialect)
129                        no_lines = len(list(reader))
130                        for i in range(0, no_lines):
131                            for column in self.output_columns:
132                                results[column].append(float("nan"))
133                    break
134
135            if counter == 10:
136                raise RuntimeError('Could not find valid output csv file in vicinity of: {}'.format(out_path))
137
138            
139        else:
140            with open(out_path, 'r', newline='') as csvfile:
141                reader = csv.DictReader(csvfile, dialect=self.dialect)
142                for row in reader:
143                    for column in self.output_columns:
144                        try:
145                            results[column].append(float(row[column]))
146                        except ValueError:
147                            results[column].append(row[column])
148                        except KeyError:
149                            raise RuntimeError('column not found in the csv file: {}'.format(column))
150
151        return results

Parses the CSV file and converts it to the EasyVVUQ internal dictionary based format. The file is parsed in such a way that each column will appear as a vector QoI in the output dictionary.

For example if the file contains the following data a,b 1,2 3,4

And both a and b are specified as output_columns the output will look as follows {'a': [1, 3], 'b': [2, 4]}.

Parameters

run_info (dict): Information about the run (used to retrieve construct the absolute path to the CSV file that needs decoding.