easyvvuq.actions.execute_slurm

Provides a simple action element for interacting with a SLURM job. This lets you execute your simulation on a SLURM cluster.

 1"""Provides a simple action element for interacting with a SLURM job. This lets you
 2execute your simulation on a SLURM cluster.
 3"""
 4
 5import logging
 6import re
 7import subprocess
 8import time
 9import os
10import random
11
12__license__ = "LGPL"
13
14logger = logging.getLogger(__name__)
15
16
17class ExecuteSLURM():
18    """An Action to launch and track the execution of a SLURM job.
19
20    Parameters
21    ----------
22    template_script: str
23        Filename of a file containing the script template.
24    variable: str
25        A string to be replaced with the directory in which the job is meant to be executed.
26        This is to be used to make sure that the simulation can find the correct input files and knows
27        where to put output files.
28    """
29
30    def __init__(self, template_script, variable):
31        with open(template_script, 'r') as fd:
32            self.template = fd.read()
33        self.script_name = template_script
34        self.variable = variable
35
36    def start(self, previous=None):
37        """Start the SLURM job.
38
39        Parameters
40        ----------
41        previous: dict
42            A dictionary containing information provided by previously executed actions.
43        """
44        target_dir = previous['rundir']
45        script_name = os.path.join(target_dir, os.path.basename(self.script_name))
46        script, = self.template.replace(self.variable, target_dir),
47        with open(script_name, 'w') as fd:
48            fd.write(script)
49        result = subprocess.run(
50            ['sbatch', script_name],
51            cwd=target_dir, check=True, capture_output=True)
52        stdout = result.stdout.decode('utf-8')
53        self.job_id = re.findall(r'\d+', stdout)[0]
54        while True:
55            result = subprocess.run(
56                ['squeue', '-j', self.job_id],
57                cwd=target_dir, check=True, capture_output=True)
58            stdout = result.stdout.decode('utf-8')
59            if self.job_id not in stdout:
60                break
61            time.sleep(random.randint(1, 600))
62        return previous
63
64    def finalise(self):
65        """Performs clean-up if necessary. In this case it isn't. I think.
66        """
67        return None
logger = <Logger easyvvuq.actions.execute_slurm (DEBUG)>
class ExecuteSLURM:
18class ExecuteSLURM():
19    """An Action to launch and track the execution of a SLURM job.
20
21    Parameters
22    ----------
23    template_script: str
24        Filename of a file containing the script template.
25    variable: str
26        A string to be replaced with the directory in which the job is meant to be executed.
27        This is to be used to make sure that the simulation can find the correct input files and knows
28        where to put output files.
29    """
30
31    def __init__(self, template_script, variable):
32        with open(template_script, 'r') as fd:
33            self.template = fd.read()
34        self.script_name = template_script
35        self.variable = variable
36
37    def start(self, previous=None):
38        """Start the SLURM job.
39
40        Parameters
41        ----------
42        previous: dict
43            A dictionary containing information provided by previously executed actions.
44        """
45        target_dir = previous['rundir']
46        script_name = os.path.join(target_dir, os.path.basename(self.script_name))
47        script, = self.template.replace(self.variable, target_dir),
48        with open(script_name, 'w') as fd:
49            fd.write(script)
50        result = subprocess.run(
51            ['sbatch', script_name],
52            cwd=target_dir, check=True, capture_output=True)
53        stdout = result.stdout.decode('utf-8')
54        self.job_id = re.findall(r'\d+', stdout)[0]
55        while True:
56            result = subprocess.run(
57                ['squeue', '-j', self.job_id],
58                cwd=target_dir, check=True, capture_output=True)
59            stdout = result.stdout.decode('utf-8')
60            if self.job_id not in stdout:
61                break
62            time.sleep(random.randint(1, 600))
63        return previous
64
65    def finalise(self):
66        """Performs clean-up if necessary. In this case it isn't. I think.
67        """
68        return None

An Action to launch and track the execution of a SLURM job.

Parameters
  • template_script (str): Filename of a file containing the script template.
  • variable (str): A string to be replaced with the directory in which the job is meant to be executed. This is to be used to make sure that the simulation can find the correct input files and knows where to put output files.
ExecuteSLURM(template_script, variable)
31    def __init__(self, template_script, variable):
32        with open(template_script, 'r') as fd:
33            self.template = fd.read()
34        self.script_name = template_script
35        self.variable = variable
script_name
variable
def start(self, previous=None):
37    def start(self, previous=None):
38        """Start the SLURM job.
39
40        Parameters
41        ----------
42        previous: dict
43            A dictionary containing information provided by previously executed actions.
44        """
45        target_dir = previous['rundir']
46        script_name = os.path.join(target_dir, os.path.basename(self.script_name))
47        script, = self.template.replace(self.variable, target_dir),
48        with open(script_name, 'w') as fd:
49            fd.write(script)
50        result = subprocess.run(
51            ['sbatch', script_name],
52            cwd=target_dir, check=True, capture_output=True)
53        stdout = result.stdout.decode('utf-8')
54        self.job_id = re.findall(r'\d+', stdout)[0]
55        while True:
56            result = subprocess.run(
57                ['squeue', '-j', self.job_id],
58                cwd=target_dir, check=True, capture_output=True)
59            stdout = result.stdout.decode('utf-8')
60            if self.job_id not in stdout:
61                break
62            time.sleep(random.randint(1, 600))
63        return previous

Start the SLURM job.

Parameters
  • previous (dict): A dictionary containing information provided by previously executed actions.
def finalise(self):
65    def finalise(self):
66        """Performs clean-up if necessary. In this case it isn't. I think.
67        """
68        return None

Performs clean-up if necessary. In this case it isn't. I think.