zavolanlab · deliaBlue · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026
diff --git a/README.md b/README.md
@@ -435,6 +435,10 @@ Finally, to visualize the distribution of read alignments around miRNA
 loci, ASCII-style alignment pileups are optionally generated for user-defined
 regions of interest.
 
+> **NOTE:**  Whenever an alignment contains an insertion, it is removed from
+> the read sequence. The resulting sequence is counted towards the read with an
+> identical sequence, or displayed on its own depending on whether alignments
+> are collapsed or not.
 
 The schema below is a visual representation of the individual workflow steps
 and how they are related:

diff --git a/config/config_schema.json b/config/config_schema.json
@@ -121,6 +121,11 @@
             },
             "default": {},
             "description": "Dictionary of arbitrary condition names (keys) and library names to aggregate alignment pileups for (values; MUST correspond to names in samples table)."
+        },
+        "sort_by":{
+            "type": "string",
+            "default": "position",
+            "description":"Sort pileups by the first nucleotide's position from left-to-right ('position') or by counts in descending order ('counts')."
         }
     }
 }
diff --git a/config/config_template.yaml b/config/config_template.yaml
@@ -76,4 +76,10 @@ mir_list: ['isomir', 'mirna', 'pri-mir']
 #
 # Leave as an empty dictionary if no pileups are desired.
 lib_dict: {}
+
+# sorting type
+# ASCII-style alignment pileups can be sorted by the first nucleotide's
+# position from left-to-right ('position') or by counts in descending order
+# ('counts')
+sort_by: 'position'
 ...
diff --git a/docs/api/ascii_alignment_pileup.R.md b/docs/api/ascii_alignment_pileup.R.md
@@ -40,6 +40,25 @@ than `--minimum-count` are filtered out.
     identical sequence, or displayed on its own if the option
     `--do-not-collapse-alignments` is used.
 
+**SORTING**
+
+ASCII-style alignment pileups' final arrangement is controlled by the option
+`--sort-by` and the flag `--reverse-sort`.
+
+By default, pileups are sorted by the alignment counts in descending order. If
+two alignments have the same amount of counts, these are sorted
+from-left-to-right by their first and last nucleotide position.
+
+Use `--sort-by="position"` to arrange the alignments by their first
+nucleotide's position (from-left-to-right). If more than one alignment starts
+at the same location, these are additionally sorted by counts
+(descending order), and by their last nucleotide's position
+(from-left-to-right).
+
+Both sorting types can be reversed by using the flag `--reverse-sort`:
+from "from-left-to-right" to "from-right-to-left" for sort type "position",
+and from "descending" to "ascending" for sort type "counts".
+
 **EXPECTED OUTPUT**
 
 For each BED interval, one TSV is written to `--output-directory`:
@@ -126,10 +145,14 @@ Options
   in alignments. (default: "-")
 - <b>`--prefix=STRING`</b>: Prefix to be used in the output file name(s). If
   not provided the input BAM file(s) name will be used instead.
+- <b>`--sort-by=STRING`</b>: Specify the sort type (either "position" or
+  "counts"). (default = "counts")
+- <b>`--reverse-sort`</b>: Reverse the sort order; from "from-left-to-right" to
+  "from-right-to-left" for sort type "position", and from "descending" to
+  "ascending" for sort type "counts".
 - <b>`-h`</b> | <b>`--help`</b>: Show this information and die.
 - <b>`-v`</b> | <b>`--verbose`</b>: Print log messages to `STDOUT`.
 
-
 Dependencies
 ------------
 

diff --git a/docs/workflow/modules/pileups.md b/docs/workflow/modules/pileups.md
@@ -109,6 +109,13 @@ libraries with [**ASCII-style alignment pileups**](pileups.md#third-party-softwa
     - Annotated genomic regions (`.bed`); from workflow input files or
     [**create_empty_bed**](pileups.md#create_empty_bed)
 
+=== "Parameters"
+
+    - **config_template.yaml**
+        - `sort_by`: ASCII-style alignment pileups can be sorted by the first
+        nucleotide's position from left-to-right ('position') or by counts in
+        descending order ('counts') (default: 'position')
+
 === "Output"
 
     (**Workflow output**) Empty text file (`.txt`)
@@ -134,6 +141,13 @@ run with [**ASCII-style alignment pileups**](pileups.md#third-party-software-use
     - Annotated genomic regions (`.bed`); from workflow input files or
     [**create_empty_bed**](pileups.md#create_empty_bed)
 
+=== "Parameters"
+
+    - **config_template.yaml**
+        - `sort_by`: ASCII-style alignment pileups can be sorted by the first
+        nucleotide's position from left-to-right ('position') or by counts in
+        descending order ('counts') (default: 'position')
+
 === "Output"
 
     (**Workflow output**) Empty text file (`.txt`)
@@ -169,6 +183,9 @@ different library subsets if provided with
         - `lib_dict`: Dictionary of arbitrary condition names (keys) and library
         names to aggregate alignment pileups for (values; MUST correspond to names
         in samples table) (default: None)
+        - `sort_by`: ASCII-style alignment pileups can be sorted by the first
+        nucleotide's position from left-to-right ('position') or by counts in
+        descending order ('counts') (default: 'position')
 
 === "Output"
 

diff --git a/docs/workflow/overview.md b/docs/workflow/overview.md
@@ -145,6 +145,13 @@ Finally, to visualize the distribution of read alignments around miRNA loci,
 ASCII-style alignment pileups are optionally generated for user-defined regions
 of interest.
 
+!!! warning "Alignment insertions"
+
+    Whenever an alignment contains an insertion, it is removed from the read
+    sequence. The resulting sequence is counted towards the read with an
+    identical sequence, or displayed on its own depending on whether alignments
+    are collapsed or not.
+
 ```console
 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>	test-mir
 ....>>>>>>>>>>>>>>>>>>>>>>.....................................................	test-mir-5p

diff --git a/pipeline_documentation.md b/pipeline_documentation.md
@@ -1890,6 +1890,11 @@ libraries with [**ASCII-style alignment pileups**](#third-party-software-used).
   [**index_uncollapsed_reads_bam**](#index_uncollapsed_reads_bam)
   - Annotated genomic regions (`.bed`); from workflow input files or
   [**create_empty_bed**](#create_empty_bed)
+- **Parameters**
+  - **config_template.yaml**
+    - `sort_by`: ASCII-style alignment pileups can be sorted by the first
+    nucleotide's position from left-to-right ('position') or by counts in
+    descending order ('counts') (default: 'position')
 - **Output**
   - (**Workflow output**) Empty text file (`.txt`)
 
@@ -1913,6 +1918,11 @@ run with [**ASCII-style alignment pileups**](#third-party-software-used).
   [**index_uncollapsed_reads_bam**](#index_uncollapsed_reads_bam)
   - Annotated genomic regions (`.bed`); from workflow input files or
   [**create_empty_bed**](#create_empty_bed)
+- **Parameters**
+  - **config_template.yaml**
+    - `sort_by`: ASCII-style alignment pileups can be sorted by the first
+    nucleotide's position from left-to-right ('position') or by counts in
+    descending order ('counts') (default: 'position')
 - **Output**
   - (**Workflow output**) Empty text file (`.txt`)
 
@@ -1944,6 +1954,9 @@ different library subsets if provided with
     - `lib_dict`: Dictionary of arbitrary condition names (keys) and library
     names to aggregate alignment pileups for (values; MUST correspond to names
     in samples table) (default: None)
+    - `sort_by`: ASCII-style alignment pileups can be sorted by the first
+    nucleotide's position from left-to-right ('position') or by counts in
+    descending order ('counts') (default: 'position')
 - **Output**
   - Empty text file (`.txt`)
 

diff --git a/test/test_files/config.yaml b/test/test_files/config.yaml
@@ -6,3 +6,4 @@ mirna_file: test/test_files/mirna_annotations.gff3
 map_chr_file: test/test_files/ucsc_to_ensembl.tsv
 mir_list: ['isomir', 'mirna', 'pri-mir']
 lib_dict: {"group_A": ['test_lib', 'test_lib']}
+sort_by: 'position'
diff --git a/test/test_files/config_lint.yaml b/test/test_files/config_lint.yaml
@@ -1,6 +1,7 @@
-samples: test/test_files/samples_table.tsv  
+samples: test/test_files/samples_table.tsv
 genome_file: test/test_files/genome.fa.gz
 gtf_file: test/test_files/gene_annotations.gtf.gz
 mirna_file: test/test_files/mirna_annotations.gff3
 map_chr_file: test/test_files/ucsc_to_ensembl.tsv
 mir_list: ['isomir', 'mirna', 'pri-mir']
+sort_by: 'position'
diff --git a/workflow/rules/pileup.smk b/workflow/rules/pileup.smk
@@ -144,6 +144,7 @@ rule create_per_library_ascii_pileups:
             PILEUP_DIR / "{sample}", sample=[wildcards.sample]
         ),
         prefix="{sample}",
+        sort=config["sort_by"],
     log:
         LOCAL_LOG / "pileups_{sample}.log",
     container:
@@ -155,6 +156,7 @@ rule create_per_library_ascii_pileups:
         --verbose \
         --annotations={input.annotations} \
         --reference={input.reference} \
+        --sort-by={params.sort} \
         --prefix={params.prefix} \
         --output-directory {params.out_dir} \
         {input.regions} \
@@ -191,6 +193,7 @@ rule create_per_run_ascii_pileups:
         cluster_log=CLUSTER_LOG / "pileups_whole_run.log",
         out_dir=PILEUP_DIR / "all",
         prefix="all_samples",
+        sort=config["sort_by"],
     resources:
         mem=16,
     log:
@@ -205,6 +208,7 @@ rule create_per_run_ascii_pileups:
         --annotations={input.annotations} \
         --reference={input.reference} \
         --prefix={params.prefix} \
+        --sort-by={params.sort} \
         --output-directory {params.out_dir} \
         {input.regions} \
         {input.maps} \
@@ -244,6 +248,7 @@ if config["lib_dict"] != None:
                 PILEUP_DIR / "{condition}", condition=wildcards.condition
             ),
             prefix="{condition}",
+            sort=config["sort_by"],
         log:
             LOCAL_LOG / "pileups_condition_{condition}.log",
         container:
@@ -256,6 +261,7 @@ if config["lib_dict"] != None:
             --annotations={input.annotations} \
             --reference={input.reference} \
             --prefix={params.prefix} \
+            --sort-by={params.sort} \
             --output-directory {params.out_dir} \
             {input.regions} \
             {input.maps} \

diff --git a/workflow/scripts/ascii_alignment_pileup.R b/workflow/scripts/ascii_alignment_pileup.R
@@ -29,7 +29,7 @@ against one or more regions specified in a BED file.\n"
 author <- "Author: Alexander Kanitz"
 affiliation <- "Affiliation: Biozentrum, University of Basel"
 email <- "Email: alexander.kanitz@alumni.ethz.ch"
-version <- "1.2.1"
+version <- "1.3.0"
 version_formatted <- paste("Version:", version, sep=" ")
 requirements <- c("optparse", "rtracklayer", "GenomicAlignments", "tools")
 requirements_txt <- paste("Requires:", paste(requirements, collapse=", "), sep=" ")
@@ -133,6 +133,24 @@ option_list <- list(
             the input BAM file(s) name will be used instead.",
             metavar="string"
         ),
+        make_option(
+          "--sort-by",
+          action="store",
+          type="character",
+          default="counts",
+          help="Specify the sort type (either \"position\" or \"counts\"). 
+          [default \"%default\"]",
+          metavar="string"
+        ),
+        make_option(
+          "--reverse-sort",
+          action="store_true",
+          type="logical",
+          default=FALSE,
+          help="Reverse the sort order (from \"from-left-to-right\" to 
+          \"from-right-to-left\" for sort type \"position\" and from 
+          \"descending\" to \"ascending\" for sort type \"counts\")"
+        ),
         make_option(
             c("-h", "--help"),
             action="store_true",
@@ -170,6 +188,8 @@ count.min <- cli$options[["minimum-count"]]
 char.pad <- cli$options[["padding-character"]]
 char.indel <- cli$options[["indel-character"]]
 field.name.anno <- cli$options[["annotation-name-field"]]
+sort.by <- cli$options[["sort-by"]]
+rev.sort <- cli$options[["reverse-sort"]]
 verb <- cli$options[["verbose"]]
 #==========================#
 #    PRE-REQUISITES END    #
@@ -263,15 +283,24 @@ for(index in seq_along(bed)) {
     if (as.character(strand(region))[[1]] == "-") {
         df[["seq"]] <- reverse(df[["seq"]])
     }
-    # Sort by position of first nucleotide, count and position of last nucleotide
     if (nrow(df)) {
         last_char <- nchar(df[["seq"]][[1]])
         pos.nuc.first <- regexpr(paste0("[^", char.pad, "\\.]"), df[["seq"]])
         pos.nuc.last <- last_char - regexpr(paste0("[^", char.pad, "\\.]"), unlist(lapply(df[["seq"]], reverse))) + 1
-        df <- df[order(
+
+        # Sort by pos. of first nucleotide, count and pos. of last nucleotide
+        if ( sort.by == "position" ) {
+          df <- df[order(
             pos.nuc.first, df[["count"]], pos.nuc.last,
-            decreasing=c(FALSE, TRUE, FALSE)
-        ), ]
+            decreasing = c( rev.sort, !rev.sort, rev.sort )
+          ), ]
+        # Sort by count, pos. of first nucleotide, and pos. of last nucleotide
+        } else {
+          df <- df[order(
+            df[["count"]], pos.nuc.first, pos.nuc.last,
+            decreasing = c( !rev.sort, rev.sort, rev.sort )
+          ), ]
+        }
     }
     # Reverse sequence again if on minus strand
     if (as.character(strand(region))[[1]] == "-") {
@@ -352,4 +381,4 @@ if (verb) cat("Done.\n\nSession info:\n")
 if (verb) print(sessionInfo())
 #================#
 #    MAIN END    #
-#================#
+#================#