Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
KennthShang committed Oct 30, 2024
1 parent 3d6d591 commit c88a4d4
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
21 changes: 20 additions & 1 deletion src/phabox2/contamination.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ def run(inputs):
with mp.Pool(threads) as pool:
kmer_freq_list = pool.map(get_average_kmer_freq, genomes.values())

low_rec = []
medium_rec = []
high_rec = []
for kmer_freq, genome in zip(kmer_freq_list, genomes.values()):
genome.kmer_freq = kmer_freq
if genome.regions:
Expand All @@ -109,13 +112,29 @@ def run(inputs):
genome.provirus = 'No'
genome.contamination = 0
if genome.count_viral == 0 and genome.count_host > 0:
genome.confident = 'Low quality'
genome.confident = 'Low quality;no viral marker found'
seq = genome.seq
record = SeqRecord(Seq(seq), id=genome.id, description="")
low_rec.append(record)
elif genome.count_viral > 0 and genome.count_host > genome.count_viral:
genome.confident = 'Medium quality'
seq = genome.seq
record = SeqRecord(Seq(seq), id=genome.id, description="")
medium_rec.append(record)
elif genome.kmer_freq < 1.25:
genome.confident = 'High quality'
seq = genome.seq
record = SeqRecord(Seq(seq), id=genome.id, description="")
high_rec.append(record)
else:
genome.confident = 'Medium quality'
seq = genome.seq
record = SeqRecord(Seq(seq), id=genome.id, description="")
medium_rec.append(record)

SeqIO.write(low_rec, f"{rootpth}/{out_dir}/contamination_supplementary/low_quality_virus.fa", "fasta")
SeqIO.write(medium_rec, f"{rootpth}/{out_dir}/contamination_supplementary/medium_quality_virus.fa", "fasta")
SeqIO.write(high_rec, f"{rootpth}/{out_dir}/contamination_supplementary/high_quality_virus.fa", "fasta")



Expand Down
13 changes: 7 additions & 6 deletions src/phabox2/phamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,20 +249,21 @@ def run(inputs):
low_confidence = {**low_confidence, **{item:1 for item in pred_csv[pred_csv['PhaMerConfidence'] == 'lower than viral score threshold;proteinal prophage, please run contamination detection task']['Accession'].values}}
low_virus_rec = []
for record in SeqIO.parse(f'{contigs}', 'fasta'):
try:
_ = virus_list[record.id]
virus_rec.append(record)
except:
pass
try:
_ = low_confidence[record.id]
low_virus_rec.append(record)
except:
continue
try:
_ = virus_list[record.id]
virus_rec.append(record)
except:
pass



SeqIO.write(virus_rec, f'{rootpth}/{out_dir}/phamer_supplementary/predicted_virus.fa', 'fasta')
SeqIO.write(low_virus_rec, f'{rootpth}/{out_dir}/phamer_supplementary/low_confident_virus.fa', 'fasta')
SeqIO.write(low_virus_rec, f'{rootpth}/{out_dir}/phamer_supplementary/uncertain_sequences_for_contamination_task.fa', 'fasta')
virus_protein_rec = []
check = {item: 1 for item in virus_list}
for record in SeqIO.parse(f'{rootpth}/{midfolder}/query_protein.fa', 'fasta'):
Expand Down

0 comments on commit c88a4d4

Please # to comment.