-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCreateMetadataFile.wdl
More file actions
94 lines (81 loc) · 2.48 KB
/
Copy pathCreateMetadataFile.wdl
File metadata and controls
94 lines (81 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
version 1.0
import "../utils/Helpers.wdl"
import "../utils/Structs.wdl"
workflow CreateMetadataFile {
input {
File ped_file
File ancestry_file
String prefix
String utils_docker
RuntimeAttr? runtime_attr_override
}
call CreateMetadata {
input:
ped_file = ped_file,
ancestry_file = ancestry_file,
prefix = prefix,
docker = utils_docker,
runtime_attr_override = runtime_attr_override
}
output {
File metadata = CreateMetadata.metadata_file
}
}
task CreateMetadata {
input {
File ped_file
File ancestry_file
String prefix
String docker
RuntimeAttr? runtime_attr_override
}
command <<<
set -euo pipefail
python3 <<CODE
sex_map = {}
with open("~{ped_file}") as f:
for line in f:
parts = line.strip().split('\t')
sample_id = parts[1]
sex_code = parts[4]
if sex_code == "1":
sex_map[sample_id] = "male"
elif sex_code == "2":
sex_map[sample_id] = "female"
else:
sex_map[sample_id] = "unknown"
pop_map = {}
with open("~{ancestry_file}") as f:
for line in f:
parts = line.strip().split('\t')
pop_map[parts[0]] = parts[1]
with open("~{prefix}.metadata.tsv", "w") as f:
f.write("SampleId\tSex\tPopulation\n")
for sample_id in sex_map:
sex = sex_map[sample_id]
population = pop_map.get(sample_id, "unknown")
f.write(f"{sample_id}\t{sex}\t{population}\n")
CODE
>>>
output {
File metadata_file = "~{prefix}.metadata.tsv"
}
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 4,
disk_gb: 10,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 0
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
docker: docker
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
}
}