Skip to content

Commit

Permalink
r310: added samsort - lite internal BAM sort
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Jul 23, 2015
1 parent fa212e7 commit 7db14a0
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 2 deletions.
19 changes: 18 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ CFLAGS= -g -Wall -Wc++-compat -Wno-unused-function -O2
CPPFLAGS= -Ihtslib
OBJS= main.o samview.o vcfview.o bamidx.o bcfidx.o bamshuf.o bam2fq.o tabix.o \
abreak.o bam2bed.o razf.o razip.o faidx.o bedidx.o pileup.o mapchk.o depth.o genreg.o \
kthread.o qualbin.o
kthread.o qualbin.o samsort.o
PROG= htsbox

.SUFFIXES:.c .o
Expand All @@ -20,8 +20,25 @@ lib:
qualbin.o:qualbin.c
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DBGZF_MT $< -o $@

samsort.o:samsort.c
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DBGZF_MT $< -o $@

htsbox:lib $(OBJS)
$(CC) $(CFLAGS) -o $@ $(OBJS) -Lhtslib -lhts -lpthread -lz -lm

clean:
rm -fr gmon.out *.o a.out *.dSYM *~ $(PROG); cd htslib; $(MAKE) clean; cd ..

depend:
(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c)

# DO NOT DELETE THIS LINE -- make depend depends on it.

abreak.o: boxver.h faidx.h
faidx.o: faidx.h razf.h
main.o: boxver.h
mapchk.alt.o: faidx.h
mapchk.o: faidx.h
pileup.o: faidx.h boxver.h
razf.o: razf.h
razip.o: razf.h
2 changes: 1 addition & 1 deletion boxver.h
Original file line number Diff line number Diff line change
@@ -1 +1 @@
#define HTSBOX_VERSION "r309"
#define HTSBOX_VERSION "r310"
13 changes: 13 additions & 0 deletions htslib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,16 @@ tbx.o:tbx.h bgzf.h kstring.h hts.h

clean:
rm -fr gmon.out *.o a.out *.dSYM *~ *.a *.so *.dylib

depend:
(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c)

# DO NOT DELETE THIS LINE -- make depend depends on it.

bgzf.o: bgzf.h knetfile.h
hts.o: bgzf.h hts.h kseq.h khash.h knetfile.h ksort.h
knetfile.o: knetfile.h
kurl.o: kurl.h
sam.o: sam.h bgzf.h hts.h khash.h kseq.h kstring.h
tbx.o: tbx.h hts.h bgzf.h khash.h
vcf.o: kstring.h bgzf.h vcf.h hts.h khash.h kseq.h
3 changes: 3 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ int main_mapchk(int argc, char *argv[]);
int main_depth(int argc, char *argv[]);
int main_genreg(int argc, char *argv[]);
int main_qualbin(int argc, char *argv[]);
int main_samsort(int argc, char *argv[]);

static int usage()
{
Expand All @@ -33,6 +34,7 @@ static int usage()
fprintf(stderr, " bcfidx index BCF\n");
fprintf(stderr, " faidx index FASTA\n");
fprintf(stderr, " razip indexed compression\n\n");
fprintf(stderr, " samsort sort SAM/BAM in memory\n");
fprintf(stderr, " bamshuf shuffle BAM and group alignments by query name\n");
fprintf(stderr, " bam2fq convert name grouped BAM to interleaved fastq\n");
fprintf(stderr, " bam2bed BAM->BED conversion\n");
Expand All @@ -55,6 +57,7 @@ int main(int argc, char *argv[])
else if (strcmp(argv[1], "bamshuf") == 0) return main_bamshuf(argc-1, argv+1);
else if (strcmp(argv[1], "bam2fq") == 0) return main_bam2fq(argc-1, argv+1);
else if (strcmp(argv[1], "bam2bed") == 0) return main_bam2bed(argc-1, argv+1);
else if (strcmp(argv[1], "samsort") == 0) return main_samsort(argc-1, argv+1);
else if (strcmp(argv[1], "tabix") == 0) return main_tabix(argc-1, argv+1);
else if (strcmp(argv[1], "abreak") == 0) return main_abreak(argc-1, argv+1);
else if (strcmp(argv[1], "pileup") == 0) return main_pileup(argc-1, argv+1);
Expand Down
73 changes: 73 additions & 0 deletions samsort.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#include "sam.h"
#include "ksort.h"

typedef struct {
uint64_t pos, idx;
bam1_t *b;
} aln_t;

#define aln_lt(a, b) ((a).pos < (b).pos || ((a).pos == (b).pos && (a).idx < (b).idx))
KSORT_INIT(aln, aln_t, aln_lt)

int main_samsort(int argc, char *argv[])
{
samFile *in;
int c, clevel = -1, sam_in = 0, n_threads = 2, ignore_sam_err = 0;
char moder[8], modew[8];
bam_hdr_t *h;
uint64_t k, n_aln = 0, m_aln = 0;
bam1_t *b;
aln_t *aln = 0;
BGZF *fp = 0;

while ((c = getopt(argc, argv, "SIl:t:")) >= 0) {
if (c == 'S') sam_in = 1;
else if (c == 'I') ignore_sam_err = 1;
else if (c == 'l') clevel = atoi(optarg);
else if (c == 't') n_threads = atoi(optarg);
}
if (argc == optind) {
fprintf(stderr, "Usage: samsort [-S] [-l clevel] [-t nThreads] <in.bam>|<in.sam>\n");
return 1;
}

strcpy(moder, "r");
if (!sam_in) strcat(moder, "b");
in = sam_open(argv[optind], moder, 0);

h = sam_hdr_read(in);
h->ignore_sam_err = ignore_sam_err;
b = bam_init1();
while (sam_read1(in, h, b) >= 0) {
aln_t *a;
if (n_aln == m_aln) {
m_aln = m_aln? m_aln<<1 : 64;
aln = (aln_t*)realloc(aln, m_aln * sizeof(aln_t));
}
a = &aln[n_aln++];
a->idx = n_aln - 1;
a->pos = (uint64_t)b->core.tid << 32 | b->core.pos;
a->b = bam_init1();
bam_copy1(a->b, b);
}
bam_destroy1(b);
sam_close(in);

ks_introsort(aln, n_aln, aln);

strcpy(modew, "w");
if (clevel >= 0 && clevel <= 9) sprintf(modew+1, "%d", clevel);
fp = bgzf_dopen(fileno(stdout), modew);
bgzf_mt(fp, n_threads, 256);

bam_hdr_write(fp, h);
for (k = 0; k < n_aln; ++k) {
bam_write1(fp, aln[k].b);
bam_destroy1(aln[k].b);
}
bgzf_close(fp);
free(aln);

bam_hdr_destroy(h);
return 0;
}

0 comments on commit 7db14a0

Please # to comment.