Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion chunk/src/chunker/chunker_parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void chunker::declare_options() {
opt_base.add_options()
("help", "Produces help message")
("seed", bpo::value<long int>()->default_value(15052011), "Seed of the random number generator")
("threads", bpo::value<long int>()->default_value(1), "Number of threads");
("threads,T", bpo::value<long int>()->default_value(1), "Number of threads");

bpo::options_description opt_input ("Input parameters");
opt_input.add_options()
Expand Down
8 changes: 4 additions & 4 deletions concordance/src/checker/checker_parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void checker::declare_options() {
opt_base.add_options()
("help", "Produces help message")
("seed", bpo::value<int>()->default_value(15052011), "Seed of the random number generator")
("threads", bpo::value<int>()->default_value(1), "Number of threads");
("threads,T", bpo::value<int>()->default_value(1), "Number of threads");

bpo::options_description opt_input ("Input parameters");
opt_input.add_options()
Expand All @@ -45,11 +45,11 @@ void checker::declare_options() {
opt_algo.add_options()
("af-tag", bpo::value< std::string >()->default_value("AF"), "Allele frequency INFO tag to use for binning. By default the allele frequency is estimated from the INFO/AF tag.")
("use-alt-af", "If specified, the metrics work on the ALT allele frequency (range [0,1]), rather than minor allele frequency (range [0,0.5]).")
("bins", bpo::value< std::vector < double > >()->multitoken(), "Allele frequency bins used for rsquared computations. By default they should as MAF bins [0-0.5], while they should take the full range [0-1] if --use-ref-alt is used.")
("bins", bpo::value< std::vector < double > >()->multitoken(), "Allele frequency bins used for rsquared computations. By default they should as MAF bins [0-0.5], while they should take the full range [0-1] if --use-alt-af is used.")
("ac-bins", bpo::value< std::vector < int > >()->multitoken(), "User-defined allele count bins used for rsquared computations.")
("allele-counts", "Default allele count bins used for rsquared computations. AN field must be defined in the frequency file.")
("min-val-gl", bpo::value<double>(), "Minimum genotype likelihood probability P(G|R) in validation data [set to zero to have no filter of if using --gt-validation]")
("min-val-dp", bpo::value<int>(), "Minimum coverage in validation data. If FORMAT/DP is missing and --minDP > 0, the program exits with an error. [set to zero to have no filter of if using --gt-validation]")
("min-val-gl", bpo::value<double>(), "Minimum genotype likelihood probability P(G|R) in validation data [set to zero to have no filter if using --gt-val]")
("min-val-dp", bpo::value<int>(), "Minimum coverage in validation data. If FORMAT/DP is missing and --min-val-dp > 0, the program exits with an error. [set to zero to have no filter if using --gt-val]")
("min-tar-gp", bpo::value< std::vector < float > >()->multitoken(), "Minimum GP probabilities to be used as a filter. By default it looks at the GP field to specify the filter, but will try to use FORMAT/PL if gt-tar option is specified. Leave empty if no filter is used.")
("out-r2-per-site", "Output r2 at each site.")
("out-rej-sites", "Output sites where that cannot be used for the concordance.")
Expand Down
10 changes: 5 additions & 5 deletions docs/docs/documentation/chunk.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ GLIMPSE2_chunk --input file_chr20.bcf --map chr20.b38.gmap.gz --region chr20 --s

| Option name | Argument| Default | Description |
|:---------------------|:--------|:---------|:-------------------------------------|
| \-\-window-cm | FLOAT | 4.0 | Minimal window size in cM |
| \-\-window-mb | FLOAT | 4.0 | Minimal window size in Mb |
| \-\-window-count | INT | 30000 | Minimal window size in #variants |
| \-\-window-cm | FLOAT | 2.5 | Minimal window size in cM |
| \-\-window-mb | FLOAT | 2.0 | Minimal window size in Mb |
| \-\-window-count | INT | 20000 | Minimal window size in #variants |
| \-\-buffer-cm | FLOAT | 0.5 | Minimal buffer size in cM |
| \-\-buffer-mb | FLOAT | 0.5 | Minimal buffer size in Mb |
| \-\-buffer-count | INT | 3000 | Minimal buffer size in #variants |
| \-\-buffer-mb | FLOAT | 0.4 | Minimal buffer size in Mb |
| \-\-buffer-count | INT | 2000 | Minimal buffer size in #variants |

#### Model Parameters

Expand Down
8 changes: 4 additions & 4 deletions docs/docs/documentation/concordance.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ GLIMPSE2_concordance --gt-val --ac-bins 1 5 10 20 50 100 200 500 1000 2000 5000
| Option name | Argument| Default | Description |
|:---------------------|:--------|:---------|:-------------------------------------|
| \-\-input | FILE | NA | File with four columns listing in order: regions frequencies validation and imputed dataset. For genome-wide concordance, add more lines specifying different chromosomes. |
| \-\-samples | NA | NA | List of samples to process, one sample ID per line. |
| \-\-samples | FILE | NA | List of samples to process, one sample ID per line. |
| \-\-gt-val | NA | NA | Uses hard called genotypes rather than phread-scaled likelihoods for the validation dataset, reading them from FORMAT/GT field. |
| \-\-gt-tar | NA | NA | Uses FORMAT/GT field to determine the best-guess genotype rather than the FORMAT/GP (default). FORMAT/DS are FORMAT/GP fields are still required for calibration and rsquared calculations. |

Expand All @@ -57,11 +57,11 @@ GLIMPSE2_concordance --gt-val --ac-bins 1 5 10 20 50 100 200 500 1000 2000 5000
|:--------------------|:--------|:----------|:-------------------------------------|
| \-\-af-tag | STRING | AF | Allele frequency INFO tag to use for binning. By default the allele frequency is estimated from the INFO/AF tag. |
| \-\-use-alt-af | NA | NA | If specified, the metrics work on the ALT allele frequency (range \[0,1\]), rather than minor allele frequency (range \[0,0.5\]). |
| \-\-bins | VECTOR | NA | Allele frequency bins used for rsquared computations. By default they should as MAF bins \[0-0.5\], while they should take the full range \[0-1\] if --use-ref-alt is used. |
| \-\-bins | VECTOR | NA | Allele frequency bins used for rsquared computations. By default they should as MAF bins \[0-0.5\], while they should take the full range \[0-1\] if --use-alt-af is used. |
| \-\-ac-bins | VECTOR | NA | User-defined allele count bins used for rsquared computations. |
| \-\-allele-counts | VECTOR | NA | Default allele count bins used for rsquared computations. AN field must be defined in the frequency file. |
| \-\-min-val-gl | FLOAT | NA | Minimum genotype likelihood probability P(G\|R) in validation data \[set to zero to have no filter of if using --gt-validation\] |
| \-\-min-val-dp | INT | NA | Minimum coverage in validation data. If FORMAT/DP is missing and --minDP > 0, the program exits with an error. \[set to zero to have no filter of if using --gt-validation\] |
| \-\-min-val-gl | FLOAT | NA | Minimum genotype likelihood probability P(G\|R) in validation data \[set to zero to have no filter if using --gt-val\] |
| \-\-min-val-dp | INT | NA | Minimum coverage in validation data. If FORMAT/DP is missing and --minDP > 0, the program exits with an error. \[set to zero to have no filter if using --gt-val\] |
| \-\-min-tar-gp | VECTOR | NA | Minimum GP probabilities to be used as a filter. By default it looks at the GP field to specify the filter, but will try to use FORMAT/PL if gt-tar option is specified. Leave empty if no filter is used. |
| \-\-out-r2-per-site | NA | NA | Output r2 at each site. |
| \-\-out-rej-sites | NA | NA | Output sites where that cannot be used for the concordance. |
Expand Down
3 changes: 1 addition & 2 deletions docs/docs/documentation/ligate.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ GLIMPSE2_ligate --input list_imputed_files_chr20.txt --output ligated_chr20.bcf
|:---------------------|:--------|:---------|:-------------------------------------|
| \-\-help | NA | NA | Produces help message |
| \-\-seed | INT | 15052011 | Seed of the random number generator |
| \-T \[ \-\-thread \] | INT | 1 | Number of threads |
| \-T \[ \-\-threads \]| INT | 1 | Number of threads |

#### Input files

Expand All @@ -53,6 +53,5 @@ GLIMPSE2_ligate --input list_imputed_files_chr20.txt --output ligated_chr20.bcf
| Option name | Argument| Default | Description |
|:---------------------|:--------|:---------|:-------------------------------------|
| \-O \[\-\-output \] | STRING | NA | Output ligated (phased) file in VCF/BCF format |
| \-\-no-index | STRING | NA | If specified, the ligated VCF/BCF is not indexed by GLIMPSE2 for random access to genomic regions |
| \-\-log | STRING | NA | Log file |

25 changes: 14 additions & 11 deletions docs/docs/documentation/phase.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,27 +57,29 @@ GLIMPSE2_phase --bam-list bams_1.0x.txt --reference binary_reference_panel_chr20
| \-\-keep-monomorphic-ref-sites | NA | NA | **Expert setting.** Keeps monomorphic markers in the reference panel (removed by default) |
| \-\-impute-reference-only-variants | NA | NA | Only used together with \-\-input-gl. Allows imputation at variants only present in the reference panel (no GL called at these positions). The use of this option is intended only to allow imputation at sporadic missing variants. If the number of missing variants is non-sporadic, please re-run the genotype likelihood computation at all reference variants and avoid using this option, since data from the reads should be used. A warning is thrown if reference-only variants are found.|
| \-\-input-field-gl | NA | NA | Only used together with \-\-input-gl. Use FORMAT/GL field instead of FORMAT/PL to read genotyope likelihoods |
| \-\-use-gl-indels | NA | NA | **Expert setting.** Only used together with \-\-input-gl. Use genotype likelihoods at indels from the VCF/BCF file. By default GLIMPSE assumes flat likelihoods at non-SNP variants, as genotype likelihoods from low-coverage data are often miscalibrated, potentially affecting neighbouring variants. |
| \-\-checkpoint-file-in | FILE | NA | File to read in checkpoint from |

#### Model parameters

| Option name | Argument| Default | Description |
|:--------------------|:--------|:----------|:-------------------------------------|
| \-\-burn-in | INT | 5 | **Expert setting.** Number of burn-in iterations of the Gibbs sampler
| \-\-main | INT | 15 | **Expert setting.** Number of main iterations of the Gibbs sampler
| \-\-ne | INT | 100000 | **Expert setting.** Effective diploid population size modelling recombination frequency
| \-\-min-gl | FLOAT | 1e-10 | **Expert setting.** Minimim haploid likelihood
| \-\-err-imp | FLOAT | 1e-12 | **Expert setting.** Imputation HMM error rate
| \-\-err-phase | FLOAT | 1e-4 | **Expert setting.** Phasing HMM error rate
| \-\-burnin | INT | 5 | **Expert setting.** Number of burn-in iterations of the Gibbs sampler |
| \-\-main | INT | 15 | **Expert setting.** Number of main iterations of the Gibbs sampler |
| \-\-ne | INT | 100000 | **Expert setting.** Effective diploid population size modelling recombination frequency |
| \-\-min-gl | FLOAT | 1e-10 | **Expert setting.** Minimim haploid likelihood |
| \-\-err-imp | FLOAT | 1e-12 | **Expert setting.** Imputation HMM error rate |
| \-\-err-phase | FLOAT | 1e-4 | **Expert setting.** Phasing HMM error rate |

#### Selection parameters

| Option name | Argument| Default | Description |
|:--------------------|:--------|:----------|:-------------------------------------|
| \-\-pbwt-depth | INT | 12 | **Expert setting.** Number of neighbors in the sparse PBWT selection step (positive number).
| \-\-pbwt-modulo-cm | FLOAT | 5 | **Expert setting.** Frequency of PBWT selection in cM (positive number). This parameter is automatically adjusted in case of small imputation regions.
| \-\-Kinit | INT | 1000 | **Expert setting.** Number of states used for initialization (positive number). Can be set to zero only when --state-list is set, to skip the selection for the initialization step.
| \-\-Kpbwt | INT | 2000 | **Expert setting.** Maximum number of states selected from the sparse PBWT (positive number). Can be set to zero only when --state-list is set, to skip the selection for during the Gibbs iterations.
| \-\-state-list | FILE | 5 | **Expert setting.** List (.txt file) of haplotypes always present in the conditioning set, independent from state selection. Not affected by other selection parameters. Each row is a target haplotype (two lines per sample in case of diploid individuals) each column is a space separated list of reference haplotypes (in numerical order 0-(2N-1) ). Useful when prior knowledge of relatedness between the reference and target panel is known a priori.
| \-\-pbwt-depth | INT | 12 | **Expert setting.** Number of neighbors in the sparse PBWT selection step (positive number). |
| \-\-pbwt-modulo-cm | FLOAT | 0.1 | **Expert setting.** Frequency of PBWT selection in cM (positive number). This parameter is automatically adjusted in case of small imputation regions. |
| \-\-Kinit | INT | 1000 | **Expert setting.** Number of states used for initialization (positive number). Can be set to zero only when --state-list is set, to skip the selection for the initialization step. |
| \-\-Kpbwt | INT | 2000 | **Expert setting.** Maximum number of states selected from the sparse PBWT (positive number). Can be set to zero only when --state-list is set, to skip the selection for during the Gibbs iterations. |
| \-\-state-list | FILE | NA | **Expert setting.** List (.txt file) of haplotypes always present in the conditioning set, independent from state selection. Not affected by other selection parameters. Each row is a target haplotype (two lines per sample in case of diploid individuals) each column is a space separated list of reference haplotypes (in numerical order 0-(2N-1) ). Useful when prior knowledge of relatedness between the reference and target panel is known a priori. |

#### BAM/CRAM options and filters

Expand Down Expand Up @@ -105,5 +107,6 @@ GLIMPSE2_phase --bam-list bams_1.0x.txt --reference binary_reference_panel_chr20
| \-\-bgen-bits | INT | 8 | **Expert setting.** Only used together when the output is in BGEN file format. Specifies the number of bits to be used for the encoding probabilities of the output BGEN file. If the output is in the .vcf\[.gz\]/.bcf format, this value is ignored. Accepted values: 1-32. |
| \-\-bgen-compr | STRING | zstd | **Expert setting.** Only used together when the output is in BGEN file format. Specifies the compression of the output BGEN file. If the output is in the .vcf\[.gz\]/.bcf format, this value is ignored. Accepted values: \[no,zlib,zstd\] |
| \-\-log | FILE | NA | Log file |
| \-\-checkpoint-file-out | FILE | NA | File to save checkpoint info in |


2 changes: 1 addition & 1 deletion docs/docs/documentation/split_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ GLIMPSE2_split_reference --input-region chr20:7702567-12266861 --output-region c
|:---------------------|:--------|:---------|:-------------------------------------|
| \-\-help | NA | NA | Produces help message |
| \-\-seed | INT | 15052011 | Seed of the random number generator |
| \-T \[ \-\-threads\] | INT | 1 | Number of threads |
| \-T \[ \-\-threads \]| INT | 1 | Number of threads |



Expand Down
4 changes: 2 additions & 2 deletions ligate/src/ligater/ligater_parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ void ligater::declare_options() {
opt_base.add_options()
("help", "Produces help message")
("seed", bpo::value<int>()->default_value(15052011), "Seed of the random number generator")
("threads", bpo::value<int>()->default_value(1), "Number of threads");
("threads,T", bpo::value<int>()->default_value(1), "Number of threads");

bpo::options_description opt_input ("Input files");
opt_input.add_options()
("input", bpo::value < std::string >(), "Text file containing all VCF/BCF to ligate, one file per line");
("input,I", bpo::value < std::string >(), "Text file containing all VCF/BCF to ligate, one file per line");

bpo::options_description opt_output ("Output files");
opt_output.add_options()
Expand Down
2 changes: 1 addition & 1 deletion phase/src/caller/caller_parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void caller::declare_options() {
opt_base.add_options()
("help", "Produces help message")
("seed", bpo::value<int>()->default_value(15052011), "Seed of the random number generator")
("threads", bpo::value<int>()->default_value(1), "Number of threads");
("threads,T", bpo::value<int>()->default_value(1), "Number of threads");

bpo::options_description opt_input ("Input parameters");
opt_input.add_options()
Expand Down
2 changes: 1 addition & 1 deletion split_reference/src/caller/caller_parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void caller::declare_options() {
opt_base.add_options()
("help", "Produces help message")
("seed", bpo::value<int>()->default_value(15052011), "Seed of the random number generator")
("threads", bpo::value<int>()->default_value(1), "Number of threads");
("threads,T", bpo::value<int>()->default_value(1), "Number of threads");

bpo::options_description opt_input ("Input parameters");
opt_input.add_options()
Expand Down