forked from ISUgenomics/common_scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGI2Taxonomy.sh
More file actions
executable file
·36 lines (29 loc) · 939 Bytes
/
GI2Taxonomy.sh
File metadata and controls
executable file
·36 lines (29 loc) · 939 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
# donwload nodes.dmp and names.dmp to provide the loaction here
# run it as:
# parallel "sh GI2Taxonomy.sh {}" ::: gi_ids.file
#
NAMES="names.dmp"
NODES="nodes.dmp"
GI_TO_TAXID="gi_taxid_nucl.dmp"
TAXONOMY=""
GI="${1}"
# Obtain the name corresponding to a taxid or the taxid of the parent taxa
get_name_or_taxid()
{
grep --max-count=1 "^${1}"$'\t' "${2}" | cut --fields="${3}"
}
# Get the taxid corresponding to the GI number
TAXID=$(get_name_or_taxid "${GI}" "${GI_TO_TAXID}" "2")
# Loop until you reach the root of the taxonomy (i.e. taxid = 1)
while [[ "${TAXID}" -gt 1 ]] ; do
# Obtain the scientific name corresponding to a taxid
NAME=$(get_name_or_taxid "${TAXID}" "${NAMES}" "3")
# Obtain the parent taxa taxid
PARENT=$(get_name_or_taxid "${TAXID}" "${NODES}" "3")
# Build the taxonomy path
TAXONOMY="${NAME};${TAXONOMY}"
TAXID="${PARENT}"
done
echo -e "${GI}\t${TAXONOMY}"
exit 0