easyvvuq.decoders.robust_csv
A Decoder for CSV format files.
1"""A Decoder for CSV format files. 2""" 3 4import os 5import logging 6import csv 7from easyvvuq import OutputType 8 9__copyright__ = """ 10 11 Copyright 2018 Robin A. Richardson, David W. Wright, Juraj Kardos 12 13 This file is part of EasyVVUQ 14 15 EasyVVUQ is free software: you can redistribute it and/or modify 16 it under the terms of the Lesser GNU General Public License as published by 17 the Free Software Foundation, either version 3 of the License, or 18 (at your option) any later version. 19 20 EasyVVUQ is distributed in the hope that it will be useful, 21 but WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 23 Lesser GNU General Public License for more details. 24 25 You should have received a copy of the Lesser GNU General Public License 26 along with this program. If not, see <https://www.gnu.org/licenses/>. 27 28""" 29__license__ = "LGPL" 30 31 32logger = logging.Logger(__name__) 33 34 35class RobustCSV: 36 """CSV Decoder. 37 38 Parameters 39 ---------- 40 target_filename: str 41 Filename of a CSV file to decode. 42 ouput_columns: list 43 A list of column names that will be selected to appear in the output. 44 """ 45 def __init__(self, target_filename, output_columns, dialect='excel'): 46 if len(output_columns) == 0: 47 msg = "output_columns cannot be empty." 48 logger.error(msg) 49 raise RuntimeError(msg) 50 self.target_filename = target_filename 51 self.output_columns = output_columns 52 self.output_type = OutputType('sample') 53 self.dialect = dialect 54 55 @staticmethod 56 def _get_output_path(run_info=None, outfile=None): 57 """Constructs absolute path from the `target_filename` and the `run_dir` parameter 58 in the `run_info` retrieved from the database. 59 60 Parameters 61 ---------- 62 run_info: dict 63 Run info as retrieved from the database. 64 outfile: str 65 Filename of the file to be parsed. 66 67 Returns 68 ------- 69 str 70 An absolute path to the output file in the run directory. 71 """ 72 run_path = run_info['run_dir'] 73 if not os.path.isdir(run_path): 74 raise RuntimeError(f"Run directory does not exist: {run_path}") 75 return os.path.join(run_path, outfile) 76 77 def parse_sim_output(self, run_info={}): 78 """Parses the CSV file and converts it to the EasyVVUQ internal dictionary based 79 format. The file is parsed in such a way that each column will appear as a vector 80 QoI in the output dictionary. 81 82 For example if the file contains the following data 83 a,b 84 1,2 85 3,4 86 87 And both `a` and `b` are specified as `output_columns` the output will look as follows 88 {'a': [1, 3], 'b': [2, 4]}. 89 90 Parameters 91 ---------- 92 run_info: dict 93 Information about the run (used to retrieve construct the absolute path 94 to the CSV file that needs decoding. 95 """ 96 out_path = self._get_output_path(run_info, self.target_filename) 97 98 results = {} 99 for column in self.output_columns: 100 results[column] = [] 101 102 # Test if the ouput file exists 103 # e.g. the simulation could have failed 104 # thus no output was produced, fill in with Nan if missing 105 if not os.path.isfile(out_path): 106 print(f"Ouput file {out_path} does not exist, using NaN values") 107 run_path = run_info['run_dir'] #e.g xxx/xxx/xxx/run_123 108 run_prefix = "/".join(run_path.split("/")[0:-1]) #e.g xxx/xxx/xxx 109 run_dir = run_path.split("/")[-1] #e.g run_123 110 run_id = int(run_dir.split("_")[1]) #e.g. 123 111 112 # Test if some of nearby valid ouput file exists, 113 # explore range run_(id-10) -- run_(id+10) 114 # We read such file instead, and use NaN values instead of 115 # the acutal values, in this way the output will have 116 # the correct data dimension, but filled with NaN 117 counter = -10 118 while counter < 10: 119 out_path_aux = "/".join([run_prefix, "run_"+str(run_id+counter), self.target_filename]) 120 print(f"Testing existence of file {out_path_aux}") 121 if not os.path.isfile(out_path_aux): 122 counter = counter + 1 123 continue 124 else: 125 print(f"Reading file {out_path_aux} in order to have the appropriate dimension of NaN values") 126 with open(out_path_aux, 'r', newline='') as csvfile: 127 reader = csv.DictReader(csvfile, dialect=self.dialect) 128 no_lines = len(list(reader)) 129 for i in range(0, no_lines): 130 for column in self.output_columns: 131 results[column].append(float("nan")) 132 break 133 134 if counter == 10: 135 raise RuntimeError('Could not find valid output csv file in vicinity of: {}'.format(out_path)) 136 137 138 else: 139 with open(out_path, 'r', newline='') as csvfile: 140 reader = csv.DictReader(csvfile, dialect=self.dialect) 141 for row in reader: 142 for column in self.output_columns: 143 try: 144 results[column].append(float(row[column])) 145 except ValueError: 146 results[column].append(row[column]) 147 except KeyError: 148 raise RuntimeError('column not found in the csv file: {}'.format(column)) 149 150 return results
logger =
<Logger easyvvuq.decoders.robust_csv (NOTSET)>
class
RobustCSV:
36class RobustCSV: 37 """CSV Decoder. 38 39 Parameters 40 ---------- 41 target_filename: str 42 Filename of a CSV file to decode. 43 ouput_columns: list 44 A list of column names that will be selected to appear in the output. 45 """ 46 def __init__(self, target_filename, output_columns, dialect='excel'): 47 if len(output_columns) == 0: 48 msg = "output_columns cannot be empty." 49 logger.error(msg) 50 raise RuntimeError(msg) 51 self.target_filename = target_filename 52 self.output_columns = output_columns 53 self.output_type = OutputType('sample') 54 self.dialect = dialect 55 56 @staticmethod 57 def _get_output_path(run_info=None, outfile=None): 58 """Constructs absolute path from the `target_filename` and the `run_dir` parameter 59 in the `run_info` retrieved from the database. 60 61 Parameters 62 ---------- 63 run_info: dict 64 Run info as retrieved from the database. 65 outfile: str 66 Filename of the file to be parsed. 67 68 Returns 69 ------- 70 str 71 An absolute path to the output file in the run directory. 72 """ 73 run_path = run_info['run_dir'] 74 if not os.path.isdir(run_path): 75 raise RuntimeError(f"Run directory does not exist: {run_path}") 76 return os.path.join(run_path, outfile) 77 78 def parse_sim_output(self, run_info={}): 79 """Parses the CSV file and converts it to the EasyVVUQ internal dictionary based 80 format. The file is parsed in such a way that each column will appear as a vector 81 QoI in the output dictionary. 82 83 For example if the file contains the following data 84 a,b 85 1,2 86 3,4 87 88 And both `a` and `b` are specified as `output_columns` the output will look as follows 89 {'a': [1, 3], 'b': [2, 4]}. 90 91 Parameters 92 ---------- 93 run_info: dict 94 Information about the run (used to retrieve construct the absolute path 95 to the CSV file that needs decoding. 96 """ 97 out_path = self._get_output_path(run_info, self.target_filename) 98 99 results = {} 100 for column in self.output_columns: 101 results[column] = [] 102 103 # Test if the ouput file exists 104 # e.g. the simulation could have failed 105 # thus no output was produced, fill in with Nan if missing 106 if not os.path.isfile(out_path): 107 print(f"Ouput file {out_path} does not exist, using NaN values") 108 run_path = run_info['run_dir'] #e.g xxx/xxx/xxx/run_123 109 run_prefix = "/".join(run_path.split("/")[0:-1]) #e.g xxx/xxx/xxx 110 run_dir = run_path.split("/")[-1] #e.g run_123 111 run_id = int(run_dir.split("_")[1]) #e.g. 123 112 113 # Test if some of nearby valid ouput file exists, 114 # explore range run_(id-10) -- run_(id+10) 115 # We read such file instead, and use NaN values instead of 116 # the acutal values, in this way the output will have 117 # the correct data dimension, but filled with NaN 118 counter = -10 119 while counter < 10: 120 out_path_aux = "/".join([run_prefix, "run_"+str(run_id+counter), self.target_filename]) 121 print(f"Testing existence of file {out_path_aux}") 122 if not os.path.isfile(out_path_aux): 123 counter = counter + 1 124 continue 125 else: 126 print(f"Reading file {out_path_aux} in order to have the appropriate dimension of NaN values") 127 with open(out_path_aux, 'r', newline='') as csvfile: 128 reader = csv.DictReader(csvfile, dialect=self.dialect) 129 no_lines = len(list(reader)) 130 for i in range(0, no_lines): 131 for column in self.output_columns: 132 results[column].append(float("nan")) 133 break 134 135 if counter == 10: 136 raise RuntimeError('Could not find valid output csv file in vicinity of: {}'.format(out_path)) 137 138 139 else: 140 with open(out_path, 'r', newline='') as csvfile: 141 reader = csv.DictReader(csvfile, dialect=self.dialect) 142 for row in reader: 143 for column in self.output_columns: 144 try: 145 results[column].append(float(row[column])) 146 except ValueError: 147 results[column].append(row[column]) 148 except KeyError: 149 raise RuntimeError('column not found in the csv file: {}'.format(column)) 150 151 return results
CSV Decoder.
Parameters
- target_filename (str): Filename of a CSV file to decode.
- ouput_columns (list): A list of column names that will be selected to appear in the output.
RobustCSV(target_filename, output_columns, dialect='excel')
46 def __init__(self, target_filename, output_columns, dialect='excel'): 47 if len(output_columns) == 0: 48 msg = "output_columns cannot be empty." 49 logger.error(msg) 50 raise RuntimeError(msg) 51 self.target_filename = target_filename 52 self.output_columns = output_columns 53 self.output_type = OutputType('sample') 54 self.dialect = dialect
def
parse_sim_output(self, run_info={}):
78 def parse_sim_output(self, run_info={}): 79 """Parses the CSV file and converts it to the EasyVVUQ internal dictionary based 80 format. The file is parsed in such a way that each column will appear as a vector 81 QoI in the output dictionary. 82 83 For example if the file contains the following data 84 a,b 85 1,2 86 3,4 87 88 And both `a` and `b` are specified as `output_columns` the output will look as follows 89 {'a': [1, 3], 'b': [2, 4]}. 90 91 Parameters 92 ---------- 93 run_info: dict 94 Information about the run (used to retrieve construct the absolute path 95 to the CSV file that needs decoding. 96 """ 97 out_path = self._get_output_path(run_info, self.target_filename) 98 99 results = {} 100 for column in self.output_columns: 101 results[column] = [] 102 103 # Test if the ouput file exists 104 # e.g. the simulation could have failed 105 # thus no output was produced, fill in with Nan if missing 106 if not os.path.isfile(out_path): 107 print(f"Ouput file {out_path} does not exist, using NaN values") 108 run_path = run_info['run_dir'] #e.g xxx/xxx/xxx/run_123 109 run_prefix = "/".join(run_path.split("/")[0:-1]) #e.g xxx/xxx/xxx 110 run_dir = run_path.split("/")[-1] #e.g run_123 111 run_id = int(run_dir.split("_")[1]) #e.g. 123 112 113 # Test if some of nearby valid ouput file exists, 114 # explore range run_(id-10) -- run_(id+10) 115 # We read such file instead, and use NaN values instead of 116 # the acutal values, in this way the output will have 117 # the correct data dimension, but filled with NaN 118 counter = -10 119 while counter < 10: 120 out_path_aux = "/".join([run_prefix, "run_"+str(run_id+counter), self.target_filename]) 121 print(f"Testing existence of file {out_path_aux}") 122 if not os.path.isfile(out_path_aux): 123 counter = counter + 1 124 continue 125 else: 126 print(f"Reading file {out_path_aux} in order to have the appropriate dimension of NaN values") 127 with open(out_path_aux, 'r', newline='') as csvfile: 128 reader = csv.DictReader(csvfile, dialect=self.dialect) 129 no_lines = len(list(reader)) 130 for i in range(0, no_lines): 131 for column in self.output_columns: 132 results[column].append(float("nan")) 133 break 134 135 if counter == 10: 136 raise RuntimeError('Could not find valid output csv file in vicinity of: {}'.format(out_path)) 137 138 139 else: 140 with open(out_path, 'r', newline='') as csvfile: 141 reader = csv.DictReader(csvfile, dialect=self.dialect) 142 for row in reader: 143 for column in self.output_columns: 144 try: 145 results[column].append(float(row[column])) 146 except ValueError: 147 results[column].append(row[column]) 148 except KeyError: 149 raise RuntimeError('column not found in the csv file: {}'.format(column)) 150 151 return results
Parses the CSV file and converts it to the EasyVVUQ internal dictionary based format. The file is parsed in such a way that each column will appear as a vector QoI in the output dictionary.
For example if the file contains the following data a,b 1,2 3,4
And both a and b are specified as output_columns the output will look as follows
{'a': [1, 3], 'b': [2, 4]}.
Parameters
- run_info (dict): Information about the run (used to retrieve construct the absolute path to the CSV file that needs decoding.