Skip to content

Commit

Permalink
Merge pull request #33 from leqi0001/master
Browse files Browse the repository at this point in the history
Added a BDRhapsody_specific.py and forced BarcodeHandler to read barc…
  • Loading branch information
arogozhnikov authored Aug 17, 2024
2 parents 7b6c252 + 5664aac commit 61d7926
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
36 changes: 36 additions & 0 deletions demuxalot/BDRhapsody_specific.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
This file defines callbacks that are optimized for BD Rhapsody WTA (RNA) assays
and can overcome some of the issues in BD Rhapsody output.
If some other aligner is used, you can use simpler callbacks (e.g. using only mapq).
"""
from pysam import AlignedRead
from typing import Optional, Tuple

from demuxalot.utils import hash_string


def parse_read(read: AlignedRead, umi_tag="MA", nhits_tag="NH", score_tag="AS",
score_diff_max = 8, mapq_threshold = 20,
# max. 2 edits --^
p_misaligned_default = 0.01) -> Optional[Tuple[float, int]]:
"""
returns None if read should be ignored.
Read still can be ignored if it is not in the barcode list
"""
if read.get_tag(score_tag) <= len(read.seq) - score_diff_max:
# too many edits
return None
if read.get_tag(nhits_tag) > 1:
# multi-mapped
return None
if not read.has_tag(umi_tag):
# does not have molecule barcode
return None
if read.mapq < mapq_threshold:
# this one should not be triggered because of NH, but just in case
return None

p_misaligned = p_misaligned_default # default value
ub = hash_string(read.get_tag(umi_tag))
return p_misaligned, ub
2 changes: 1 addition & 1 deletion demuxalot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def from_file(barcodes_filename, **kwargs):
:param barcodes_filename: path to barcodes.csv or barcodes.csv.gz where each line is a barcode
:param **kwargs: optional additional keyword arguments to pass down to BarcodeHandler.__init__
"""
barcodes = pd.read_csv(barcodes_filename, header=None)[0].values
barcodes = pd.read_csv(barcodes_filename, header=None)[0].values.astype("str")
return BarcodeHandler(barcodes, **kwargs)

def filter_to_rg_value(self, rg_value):
Expand Down

0 comments on commit 61d7926

Please # to comment.