Skip to content

Commit

Permalink
bump v1.3.9
Browse files Browse the repository at this point in the history
  • Loading branch information
cytham committed Mar 24, 2021
1 parent 5fc7e4a commit 45b1961
Show file tree
Hide file tree
Showing 15 changed files with 1,978 additions and 1,781 deletions.
19 changes: 19 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ NanoVar Changelog

Release Summary:


Version 1.3.9 - Mar 24, 2021
* Fixed nv_detect_algo insertion and deletion large size bug
* Added pysam >=0.15.4 into bioconda metal.yml as prerequisite
* Added pybedtools >=0.8.2 prerequisite to fixed RuntimeWarning buffering=1 error (Refer to https://github.com/daler/pybedtools/issues/322)
* Prevent repeated read-indexes by adjusting seed (Thanks to Geoffrey Woodland)
* Improve read cluster exception message (Thanks to Geoffrey Woodland)
* Unique ID of breakpoints identified by BLAST shortened to four characters to prevent mixing with minimap2 breakpoints
* Adjusted breakend filtering during mm clustering
* Improved breakpoint clustering algorithm to increase accuracy
* Added newline to last line of genome.sizes file
* Added genome check for BAM (Thanks to oneillkza, https://github.com/cytham/nanovar/issues/19#issuecomment-791599629)
* Modified argparse "usage" format
* Suppressed BAM index missing warning
* Supressed Tensorflow INFO and WARNING logs
* Migrated to tensorflow-cpu/tensorflow-mkl to prevent cuda_driver.cc error
* Fixed FixedLocator warning


Version 1.3.8 - May 24, 2020
* Fixed file type detection (Thanks to jiadong324, https://github.com/cytham/nanovar/issues/9#issuecomment-626579853)
* Fixed negative coordinates in VCF
Expand Down
17 changes: 16 additions & 1 deletion nanovar/nanovar
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def main():
filename = os.path.basename(file_path)
read_suffix = ['.fa', '.fq', '.fasta', '.fastq', '.fa.gzip', '.fq.gzip', '.fa.gz', '.fq.gz', '.fasta.gz', '.fastq.gz']
bam_suffix = '.bam'
contig_list = []
if any(filename.lower().endswith(s) for s in read_suffix):
input_name = os.path.basename(file_path).rsplit('.f', 1)[0]
input_type = 'raw'
Expand All @@ -153,12 +154,18 @@ def main():
else:
logging.debug("Input FASTQ/FASTA file passed")
elif filename.lower().endswith(bam_suffix):
save = pysam.set_verbosity(0) # Suppress BAM index missing warning
sam = pysam.AlignmentFile(file_path, "rb")
pysam.set_verbosity(save) # Revert verbosity level
try:
assert sam.is_bam, "Error: Input BAM file is not a BAM file."
input_name = os.path.basename(file_path).rsplit('.bam', 1)[0]
input_type = 'bam'
fastx_check = []
# Get BAM contigs from header
header = sam.header.to_dict()
for h in header['SQ']:
contig_list.append(h['SN'])
except AssertionError:
logging.critical("Error: Input BAM file is not a BAM file.")
raise Exception("Error: Input BAM file is not a BAM file.")
Expand Down Expand Up @@ -200,6 +207,13 @@ def main():
contig_len_dict[seq_record.id] = len(seq_record)
total_gsize += len(seq_record)

# Check BAM contigs in reference genome
if input_type == 'bam':
for c in contig_list:
if c not in contig_len_dict:
logging.critical("Error: Contig %s in BAM is absent in reference genome" % c)
raise Exception("Error: Contig %s in BAM is absent in reference genome" % c)

# Check contig id for invalid symbols
contig_omit = checkcontignames(contig_len_dict)

Expand Down Expand Up @@ -327,13 +341,14 @@ def main():
make_index(force, ref_path, wk_dir, ref_name, mdb, wmk, hsb)

# Run hsblastn on INS and INV reads
hsba = align_hsb(ref_path, wk_dir, ref_name, threads_bt, hsb)
hsba = align_hsb(ref_path, wk_dir, ref_name, threads_bt, hsb, debug)
sub_run = VariantDetect(wk_dir, hsba[1], splitpct, minalign, filter_path, minlen, buff, model_path,
total_gsize, contig_len_dict, score_threshold, file_path, input_name, ref_path, ref_name, hsba[0],
mincov, homo_t, het_t, debug, contig_omit)

# Parsing INS and INV SVs and clustering
sub_run.rlendict = run.rlendict
sub_run.seed = run.seed # Try to prevent repeated read-indexes
sub_run.parse_detect_hsb()
logging.info('Parsing BAM and detecting INV and INS SVs')
run.cluster_nn(add_out=sub_run.total_out)
Expand Down
5 changes: 3 additions & 2 deletions nanovar/nv_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def align_mm(ref, read, wk_dir, read_name, ref_name, threads, mm, data_type, st)


# HS-BLASTN alignment
def align_hsb(ref, wk_dir, ref_name, threads, hsb):
def align_hsb(ref, wk_dir, ref_name, threads, hsb, debug):
obinary_path = os.path.join(wk_dir, str(ref_name) + '.counts.obinary')
out_path = os.path.join(wk_dir, 'temp-%s-blast.tab' % ref_name)
read_path = os.path.join(wk_dir, 'temp2.fa')
Expand All @@ -187,7 +187,8 @@ def align_hsb(ref, wk_dir, ref_name, threads, hsb):
if exitcode != 0:
logging.critical("Error: hs-blastn alignment failed")
raise Exception("Error: hs-blastn alignment failed, see log")
os.remove(read_path)
if not debug: # Remove temp2.fa
os.remove(read_path)
return ["hs-blastn align -db " + ref + " -window_masker_db obinary_path -query " + read_path + " -out " + out_path +
" -outfmt 6 -num_threads " + str(threads) + " -max_target_seqs 3 -gapopen 0 -gapextend 4 -penalty -3 -reward 2",
out_path]
Loading

0 comments on commit 45b1961

Please # to comment.