Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions dpdispatcher/contexts/dp_cloud_server_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,25 +289,38 @@ def machine_subfields(cls) -> List[Argument]:
list[Argument]
machine subfields
"""
doc_remote_profile = (
"The information used to maintain the connection with remote machine."
)
doc_retry_count = "The retry count when a job is terminated"
doc_ignore_exit_code = """The job state will be marked as finished if the exit code is non-zero when set to True. Otherwise,
the job state will be designated as terminated."""
doc_remote_profile = "Configuration for Bohrium submission, including login credentials, project selection, and job-handling behavior."
doc_retry_count = "How many times a terminated remote job is retried on the platform side before giving up."
doc_ignore_exit_code = """Whether a non-zero exit code from the remote platform is still treated as finished. If False, such jobs are marked as terminated."""
return [
Argument(
"remote_profile",
dict,
[
Argument("email", str, optional=True, doc="Email"),
Argument("password", str, optional=True, doc="Password"),
Argument(
"email",
str,
optional=True,
doc="Email address used to log in to Bohrium.",
),
Argument(
"password",
str,
optional=True,
doc="Password used together with email or phone login. If BOHR_TICKET is set, password-based login can be skipped.",
),
Argument(
"phone",
str,
optional=True,
doc="Phone number used to log in when email is not used.",
),
Argument(
"program_id",
int,
optional=False,
alias=["project_id"],
doc="Program ID",
doc="Program / project ID used to place uploaded jobs under the correct Bohrium project namespace.",
),
Argument(
"retry_count",
Expand All @@ -327,10 +340,13 @@ def machine_subfields(cls) -> List[Argument]:
"keep_backup",
bool,
optional=True,
doc="keep download and upload zip",
doc="Whether to keep uploaded/downloaded zip archives in the local backup directory after transfer.",
),
Argument(
"input_data", dict, optional=False, doc="Configuration of job"
"input_data",
dict,
optional=False,
doc="Platform-specific job configuration passed through to the Bohrium API.",
),
],
doc=doc_remote_profile,
Expand Down
4 changes: 2 additions & 2 deletions dpdispatcher/contexts/local_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def machine_subfields(cls) -> List[Argument]:
list[Argument]
machine subfields
"""
doc_remote_profile = "The information used to maintain the local machine."
doc_remote_profile = "Options controlling how files are staged between local_root and remote_root when both paths are on the local filesystem."
return [
Argument(
"remote_profile",
Expand All @@ -372,7 +372,7 @@ def machine_subfields(cls) -> List[Argument]:
bool,
optional=True,
default=True,
doc="Whether to use symbolic links to replace copy. This option should be turned off if the local directory is not accessible on the Batch system.",
doc="Whether to use symbolic links instead of copying files from local_root into remote_root. Disable this when the execution side cannot access the original local path through the same filesystem view.",
),
],
)
Expand Down
35 changes: 13 additions & 22 deletions dpdispatcher/contexts/ssh_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,31 +325,24 @@ def sftp(self):

@staticmethod
def arginfo():
doc_hostname = "hostname or ip of ssh connection."
doc_username = "username of target linux system"
doc_hostname = "Hostname or IP address of the SSH target machine."
doc_username = "Username used to log in to the target system."
doc_password = (
"(deprecated) password of linux system. Please use "
"`SSH keys <https://www.ssh.com/academy/ssh/key>`_ instead to improve security."
)
doc_port = "ssh connection port."
doc_port = "SSH port of the target machine. Usually 22."
doc_key_filename = (
"key filename used by ssh connection. If left None, find key in ~/.ssh or "
"use password for login"
)
doc_passphrase = "passphrase of key used by ssh connection"
doc_timeout = "timeout of ssh connection"
doc_totp_secret = (
"Time-based one time password secret. It should be a base32-encoded string"
" extracted from the 2D code."
)
doc_tar_compress = "The archive will be compressed in upload and download if it is True. If not, compression will be skipped."
doc_look_for_keys = (
"enable searching for discoverable private key files in ~/.ssh/"
)
doc_execute_command = "execute command after ssh connection is established."
doc_proxy_command = (
"ProxyCommand to use for SSH connection through intermediate servers."
"Path to the private key file used for SSH authentication. If left None, DPDispatcher can "
"try discoverable keys in ~/.ssh or fall back to password-based login if configured."
)
doc_passphrase = "Passphrase for the SSH private key, if the key is encrypted."
doc_timeout = "Timeout in seconds for establishing the SSH connection."
doc_totp_secret = "Time-based one-time-password secret used for keyboard-interactive 2FA. It should be a base32-encoded string."
doc_tar_compress = "Whether upload/download tar archives are compressed. Keeping this True usually reduces transfer size at the cost of extra CPU time."
doc_look_for_keys = "Whether to search for discoverable private key files in ~/.ssh when key_filename is not provided."
doc_execute_command = "Optional command executed immediately after the SSH connection is established."
doc_proxy_command = "Optional SSH ProxyCommand used to reach the target through an intermediate host or tunnel."
ssh_remote_profile_args = [
Argument("hostname", str, optional=False, doc=doc_hostname),
Argument("username", str, optional=False, doc=doc_username),
Expand Down Expand Up @@ -1020,9 +1013,7 @@ def machine_subfields(cls) -> List[Argument]:
list[Argument]
machine subfields
"""
doc_remote_profile = (
"The information used to maintain the connection with remote machine."
)
doc_remote_profile = "SSH connection settings for the remote machine, including authentication, timeouts, and optional proxy/jump-host behavior."
remote_profile_format = SSHSession.arginfo()
remote_profile_format.name = "remote_profile"
remote_profile_format.doc = doc_remote_profile
Expand Down
10 changes: 7 additions & 3 deletions dpdispatcher/entrypoints/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,25 @@ def submission_args() -> Argument:
"work_base",
dtype=str,
optional=False,
doc="Base directory for the work",
doc=(
"Base directory for the work, relative to machine.local_root. "
"This must be a relative path; if an absolute path is provided it "
"will not be combined with machine.local_root."
),
),
Argument(
"forward_common_files",
dtype=List[str],
optional=True,
default=[],
doc="Common files to forward to the remote machine",
doc="Files shared by all tasks and uploaded from work_base before execution.",
),
Argument(
"backward_common_files",
dtype=List[str],
optional=True,
default=[],
doc="Common files to backward from the remote machine",
doc="Files shared by all tasks and downloaded back to work_base after execution.",
),
machine_args,
resources_args,
Expand Down
22 changes: 16 additions & 6 deletions dpdispatcher/machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,17 +407,27 @@ def gen_command_env_cuda_devices(self, resources):
@classmethod
def arginfo(cls):
# TODO: change the possible value of batch and context types after we refactor the code
doc_batch_type = "The batch job system type. Option: " + ", ".join(cls.options)
doc_batch_type = "Batch backend used to execute jobs. Option: " + ", ".join(
cls.options
)
doc_context_type = (
"The connection used to remote machine. Option: "
"Execution context / connection type used to reach the execution environment. Option: "
+ ", ".join(BaseContext.options)
)
doc_local_root = "The dir where the tasks and relating files locate. Typically the project dir."
doc_remote_root = "The dir where the tasks are executed on the remote machine. Only needed when context is not lazy-local."
doc_local_root = (
"Local project root used by DPDispatcher to find task directories and local files. "
"If submission.work_base is a relative path, it is resolved inside this directory; if "
"submission.work_base is absolute, it is used as-is and local_root is ignored."
)
doc_remote_root = (
"Remote root directory used by non-local contexts such as SSH. DPDispatcher creates and uses a "
"submission-specific working directory beneath this root on the remote side. For SSHContext, this path should be absolute."
)
doc_clean_asynchronously = (
"Clean the remote directory asynchronously after the job finishes."
"Clean the remote working directory asynchronously after the job finishes. Avoid enabling this while debugging, "
"because it can remove remote artifacts before you inspect them."
)
doc_retry_count = "Number of retries to resubmit failed jobs."
doc_retry_count = "How many times DPDispatcher will retry a failed job before raising an error."

machine_args = [
Argument("batch_type", str, optional=False, doc=doc_batch_type),
Expand Down
4 changes: 2 additions & 2 deletions dpdispatcher/machines/JH_UniScheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def resources_subfields(cls) -> List[Argument]:
list[Argument]
resources subfields
"""
doc_custom_gpu_line = "Custom GPU configuration, starting with #JSUB"
doc_custom_gpu_line = "Custom GPU header line starting with #JSUB. When set, it overrides the default UniScheduler GPU line generated from gpu_per_node."

return [
Argument(
Expand All @@ -153,7 +153,7 @@ def resources_subfields(cls) -> List[Argument]:
),
],
optional=False,
doc="Extra arguments.",
doc="JH_UniScheduler-specific extra arguments.",
)
]

Expand Down
16 changes: 5 additions & 11 deletions dpdispatcher/machines/lsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,10 @@ def resources_subfields(cls) -> List[Argument]:
list[Argument]
resources subfields
"""
doc_custom_gpu_line = "Custom GPU configuration, starting with #BSUB"
doc_gpu_usage = "Choosing if GPU is used in the calculation step. "
doc_gpu_new_syntax = (
"For LFS >= 10.1.0.3, new option -gpu for #BSUB could be used. "
"If False, and old syntax would be used."
)
doc_gpu_exclusive = (
"Only take effect when new syntax enabled. "
"Control whether submit tasks in exclusive way for GPU."
)
doc_custom_gpu_line = "Custom GPU header line starting with #BSUB. When set, it overrides the GPU-related LSF header generated from the other GPU kwargs."
doc_gpu_usage = "Whether DPDispatcher should emit an LSF GPU request line at all. If False, no GPU request header is added."
doc_gpu_new_syntax = "Whether to use the newer `#BSUB -gpu` syntax instead of the older resource string syntax. This is typically used on newer LSF versions."
doc_gpu_exclusive = "Only meaningful when gpu_new_syntax is enabled. Controls whether the submitted job requests GPUs in exclusive mode."
return [
Argument(
"kwargs",
Expand Down Expand Up @@ -207,7 +201,7 @@ def resources_subfields(cls) -> List[Argument]:
),
],
optional=False,
doc="Extra arguments.",
doc="LSF-specific extra arguments.",
)
]

Expand Down
6 changes: 3 additions & 3 deletions dpdispatcher/machines/pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,8 @@ def resources_subfields(cls) -> List[Argument]:
list[Argument]
resources subfields
"""
doc_pe_name = "The parallel environment name of SGE system."
doc_job_name = "The name of SGE's job."
doc_pe_name = "Parallel environment name used by SGE, for example `mpi`. This controls the `#$ -pe ...` header line in SGE mode."
doc_job_name = "Job name shown by SGE for this submission."

return [
Argument(
Expand All @@ -324,6 +324,6 @@ def resources_subfields(cls) -> List[Argument]:
),
],
optional=False,
doc="Extra arguments.",
doc="SGE-specific extra arguments.",
)
]
6 changes: 3 additions & 3 deletions dpdispatcher/machines/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def resources_subfields(cls) -> List[Argument]:
list[Argument]
resources subfields
"""
doc_custom_gpu_line = "Custom GPU configuration, starting with #SBATCH"
doc_custom_gpu_line = "Custom GPU header line starting with #SBATCH. When set, it overrides DPDispatcher's default Slurm GPU line generated from gpu_per_node."
return [
Argument(
"kwargs",
Expand All @@ -210,7 +210,7 @@ def resources_subfields(cls) -> List[Argument]:
)
],
optional=True,
doc="Extra arguments.",
doc="Slurm-specific extra arguments.",
)
]

Expand Down Expand Up @@ -397,7 +397,7 @@ def resources_subfields(cls) -> List[Argument]:
list[Argument]
resources subfields
"""
doc_slurm_job_size = "Number of tasks in a Slurm job"
doc_slurm_job_size = "For SlurmJobArray, how many DPDispatcher tasks are grouped into one array element / Slurm job script branch."
arg = super().resources_subfields()[0]
arg.extend_subfields(
[
Expand Down
Loading
Loading