easyvvuq.actions.execute_slurm
Provides a simple action element for interacting with a SLURM job. This lets you execute your simulation on a SLURM cluster.
1"""Provides a simple action element for interacting with a SLURM job. This lets you 2execute your simulation on a SLURM cluster. 3""" 4 5import logging 6import re 7import subprocess 8import time 9import os 10import random 11 12__license__ = "LGPL" 13 14logger = logging.getLogger(__name__) 15 16 17class ExecuteSLURM(): 18 """An Action to launch and track the execution of a SLURM job. 19 20 Parameters 21 ---------- 22 template_script: str 23 Filename of a file containing the script template. 24 variable: str 25 A string to be replaced with the directory in which the job is meant to be executed. 26 This is to be used to make sure that the simulation can find the correct input files and knows 27 where to put output files. 28 """ 29 30 def __init__(self, template_script, variable): 31 with open(template_script, 'r') as fd: 32 self.template = fd.read() 33 self.script_name = template_script 34 self.variable = variable 35 36 def start(self, previous=None): 37 """Start the SLURM job. 38 39 Parameters 40 ---------- 41 previous: dict 42 A dictionary containing information provided by previously executed actions. 43 """ 44 target_dir = previous['rundir'] 45 script_name = os.path.join(target_dir, os.path.basename(self.script_name)) 46 script, = self.template.replace(self.variable, target_dir), 47 with open(script_name, 'w') as fd: 48 fd.write(script) 49 result = subprocess.run( 50 ['sbatch', script_name], 51 cwd=target_dir, check=True, capture_output=True) 52 stdout = result.stdout.decode('utf-8') 53 self.job_id = re.findall(r'\d+', stdout)[0] 54 while True: 55 result = subprocess.run( 56 ['squeue', '-j', self.job_id], 57 cwd=target_dir, check=True, capture_output=True) 58 stdout = result.stdout.decode('utf-8') 59 if self.job_id not in stdout: 60 break 61 time.sleep(random.randint(1, 600)) 62 return previous 63 64 def finalise(self): 65 """Performs clean-up if necessary. In this case it isn't. I think. 66 """ 67 return None
logger =
<Logger easyvvuq.actions.execute_slurm (DEBUG)>
class
ExecuteSLURM:
18class ExecuteSLURM(): 19 """An Action to launch and track the execution of a SLURM job. 20 21 Parameters 22 ---------- 23 template_script: str 24 Filename of a file containing the script template. 25 variable: str 26 A string to be replaced with the directory in which the job is meant to be executed. 27 This is to be used to make sure that the simulation can find the correct input files and knows 28 where to put output files. 29 """ 30 31 def __init__(self, template_script, variable): 32 with open(template_script, 'r') as fd: 33 self.template = fd.read() 34 self.script_name = template_script 35 self.variable = variable 36 37 def start(self, previous=None): 38 """Start the SLURM job. 39 40 Parameters 41 ---------- 42 previous: dict 43 A dictionary containing information provided by previously executed actions. 44 """ 45 target_dir = previous['rundir'] 46 script_name = os.path.join(target_dir, os.path.basename(self.script_name)) 47 script, = self.template.replace(self.variable, target_dir), 48 with open(script_name, 'w') as fd: 49 fd.write(script) 50 result = subprocess.run( 51 ['sbatch', script_name], 52 cwd=target_dir, check=True, capture_output=True) 53 stdout = result.stdout.decode('utf-8') 54 self.job_id = re.findall(r'\d+', stdout)[0] 55 while True: 56 result = subprocess.run( 57 ['squeue', '-j', self.job_id], 58 cwd=target_dir, check=True, capture_output=True) 59 stdout = result.stdout.decode('utf-8') 60 if self.job_id not in stdout: 61 break 62 time.sleep(random.randint(1, 600)) 63 return previous 64 65 def finalise(self): 66 """Performs clean-up if necessary. In this case it isn't. I think. 67 """ 68 return None
An Action to launch and track the execution of a SLURM job.
Parameters
- template_script (str): Filename of a file containing the script template.
- variable (str): A string to be replaced with the directory in which the job is meant to be executed. This is to be used to make sure that the simulation can find the correct input files and knows where to put output files.
def
start(self, previous=None):
37 def start(self, previous=None): 38 """Start the SLURM job. 39 40 Parameters 41 ---------- 42 previous: dict 43 A dictionary containing information provided by previously executed actions. 44 """ 45 target_dir = previous['rundir'] 46 script_name = os.path.join(target_dir, os.path.basename(self.script_name)) 47 script, = self.template.replace(self.variable, target_dir), 48 with open(script_name, 'w') as fd: 49 fd.write(script) 50 result = subprocess.run( 51 ['sbatch', script_name], 52 cwd=target_dir, check=True, capture_output=True) 53 stdout = result.stdout.decode('utf-8') 54 self.job_id = re.findall(r'\d+', stdout)[0] 55 while True: 56 result = subprocess.run( 57 ['squeue', '-j', self.job_id], 58 cwd=target_dir, check=True, capture_output=True) 59 stdout = result.stdout.decode('utf-8') 60 if self.job_id not in stdout: 61 break 62 time.sleep(random.randint(1, 600)) 63 return previous
Start the SLURM job.
Parameters
- previous (dict): A dictionary containing information provided by previously executed actions.