From 43be5ac6b70a6fbd14cfa741540b3a7169837f1a Mon Sep 17 00:00:00 2001 From: deliaBlue <103108590+deliaBlue@users.noreply.github.com> Date: Mon, 1 Jan 2024 16:44:39 +0100 Subject: [PATCH] fix: suffix nomenclature for SAM uncollapsing (#130) * fix: account for additional collapsing suffixes * test: update expected output --- scripts/sam_uncollapse.pl | 11 ++++++----- test/expected_output.md5 | 8 ++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/scripts/sam_uncollapse.pl b/scripts/sam_uncollapse.pl index e876b0de..3f613f83 100755 --- a/scripts/sam_uncollapse.pl +++ b/scripts/sam_uncollapse.pl @@ -92,7 +92,7 @@ sub usage { Comments: CAUTION: Only marginal validation of the input file type/format performed! -Version 1.2 (2014-08-26) +Version 1.3 (2023-12-24) Written by Alexander Kanitz on 2013-11-21 '; } @@ -128,15 +128,16 @@ sub sam_uncollapse { # Get QNAME my ($id, $rest) = split /\t/, $line, 2; # Find and remove appended copy number n - $id =~ s/-(\d+)\Z//; + $id =~ /^([^_-]+)-(\d+)/; # Write appended copy number n to variable - my $repeat = $1; + my $read = $1; + my $repeat = $2; # If --suffix option is set... if ($suffix) { # Iterate over number of identical reads/alignments for my $suffix (1..$repeat) { # Recreate line with suffix - $line = join "\t", "$id.$suffix", $rest; + $line = join "\t", "$read.$suffix", $rest; # Print line print OUT $line; } @@ -145,7 +146,7 @@ sub sam_uncollapse { # Else... else { # Recreate line - $line = join "\t", $id, $rest; + $line = join "\t", $read, $rest; # Print line n times print OUT $line x $repeat; } diff --git a/test/expected_output.md5 b/test/expected_output.md5 index c3fc05e6..5f026bca 100644 --- a/test/expected_output.md5 +++ b/test/expected_output.md5 @@ -1,7 +1,7 @@ 68f943f89b52d628851dd97fb1399d68 results/TABLES/all_mirna_counts.tab 363ecee318c57ee7e2e45ca468007baa results/TABLES/all_pri-mir_counts.tab -0d76977b2e36046cc176112776c5fa4e results/test_lib/alignments_intersecting_mirna_uncollapsed_sorted.bam.bai -25aca3f96e7ed644067d2050393bf7a4 results/test_lib/alignments_intersecting_mirna_uncollapsed_sorted.bam +a0d36054628fbe1975e142d638fbb0ff results/test_lib/alignments_intersecting_mirna_uncollapsed_sorted.bam.bai +1371823fdc7a218e1e92ad546cfac585 results/test_lib/alignments_intersecting_mirna_uncollapsed_sorted.bam cc01c7884838a597c587437cb0acf64e results/test_lib/alignments_intersecting_mirna.sam b1eb81426f890d671bba8c8a815edc1e results/test_lib/alignments_intersecting_primir.sam eec9be6cda61d2728290c92c1209f455 results/intermediates/TABLES/mirna_counts_test_lib @@ -22,12 +22,12 @@ d41d8cd98f00b204e9800998ecf8427e results/intermediates/test_lib/oligomap_transc 76643f87bb2e2bff77d1b1223d7720b5 results/intermediates/test_lib/segemehl_genome_mappings.sam d41d8cd98f00b204e9800998ecf8427e results/intermediates/test_lib/transcriptome_mappings_to_genome.sam 63a32839360a985b68e0685aafad5c54 results/intermediates/test_lib/fa/reads.fa -e9e9698d9350b64b64c1f6d96019fce8 results/intermediates/test_lib/alignments_intersecting_mirna_uncollapsed.sam +0a0d69662aa8190abdcd8f395802ab00 results/intermediates/test_lib/alignments_intersecting_mirna_uncollapsed.sam edcb854702519c0002d8ce89a21e54ef results/intermediates/test_lib/reads_formatted.fasta 1a547487b8e92ad85bb26ff9b1db1f93 results/intermediates/test_lib/intersected_extended_mirna.bed a287ffc43b6afbdde3e9905bc27c28a5 results/intermediates/test_lib/alignments_all_sorted_test_lib.bam ec0e9bcc8ea857da897035c8fca4078f results/intermediates/test_lib/reads_trimmed_adapters.fasta -d7a5ab720ff9c96f41f3755a05b8f9e0 results/intermediates/test_lib/alignments_intersecting_mirna_uncollapsed.bam +37c74848cbbba56e29e240cc078f5249 results/intermediates/test_lib/alignments_intersecting_mirna_uncollapsed.bam 1f1b873d05ec14ef9b16376a1c98315b results/intermediates/test_lib/genome_mappings.sam f5cb65466d328036a15b66cfbd4d8419 results/intermediates/test_lib/oligomap_genome_report.txt 6cbdb9299e09b3e39b79a50db69226b5 results/intermediates/test_lib/transcriptome_mappings_no_header.sam