From aa83ccfc7983ede9a37b66d28f46138eb813b61d Mon Sep 17 00:00:00 2001 From: mklarqvist Date: Thu, 5 Jul 2018 11:08:29 +0100 Subject: [PATCH 01/19] updated build process --- .travis.yml | 5 ++-- README.md | 32 +++++++++++++++++++++--- makefile | 71 +++++++++++++++++++++++++++++++++++++---------------- 3 files changed, 81 insertions(+), 27 deletions(-) diff --git a/.travis.yml b/.travis.yml index f023a75..a43d667 100644 --- a/.travis.yml +++ b/.travis.yml @@ -139,7 +139,8 @@ before_install: - make -j4 - sudo make install - cd .. +script: script: - git submodule update --recursive - - make -j4 - - make examples + - make -j 4 + \ No newline at end of file diff --git a/README.md b/README.md index 310c636..f0a0864 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ # Exploring population-scale sequence variant data -Tachyon, or `YON` for short, is an open source software library for storing and rapidly querying sequence variant data in an (optionally) lossless and bit-exact representation. It was developed with a focus on enabling fast experimentation and storage of population-scaled datasets. We have benchmarked Tachyon on population-scaled datasets up to 10 million whole-genome sequenced individuals (see [benchmarks](BENCHMARKS.md)). Most genotype-specific algorithms were originally developed for [Tomahawk][tomahawk] for the purpose of calculating all-vs-all linkage-disequilibrium and identity-by-state in large-scale cohorts. +Tachyon, or `YON` for short, is an open source software library for storing and rapidly querying sequence variant data in an (optionally) lossless and bit-exact representation. It is completely compatible with BCF/VCF. It was developed with a focus on enabling fast experimentation and storage of population-scaled datasets. We have benchmarked Tachyon on population-scaled datasets up to 10 million whole-genome sequenced individuals (see [benchmarks](BENCHMARKS.md)). Tachyon grew out of the [Tomahawk][tomahawk] project for calculating genome-wide linkage-disequilibrium. ## Highlights of Tachyon * **Self-indexing**: Tachyon always builds the best possible quad-tree, linear, and meta-index given the input data (irrespective of sorting). There are no external indices as data are stored in the file itself. @@ -56,25 +56,49 @@ You will need to have installed the following dependencies: * [openssl][openssl]: An open-source library for encryption/decryption ### Building from source -If the required dependencies listed above are installed then building is trivial. Note the added `--recursive` flag to the clone request. This flag is required to additionally clone the latest third-party dependencies. +If the required external dependencies listed above are installed then building is trivial. Note the added `--recursive` flag to the clone request. This flag is required to additionally pull down the latest third-party dependencies. ```bash git clone --recursive https://github.com/mklarqvist/tachyon cd tachyon make ``` -Tachyon comes bundled with several API-examples in the `lib_example` directory. Build them with +Tachyon comes bundled with several API-examples in the `lib_example` directory. They are built by default but should you want to rebuild them execute the command: ```bash make examples ``` +### Building without admin privilidges If you have no super-user powers required to install software on your machine: -``` + +### Linux/MacOSX +```bash git clone --recursive https://github.com/mklarqvist/tachyon cd tachyon +# If you do NOT have ZSTD available git clone https://github.com/facebook/zstd cd zstd make cd .. +# If you do NOT have OpenSSL installed +git clone git://git.openssl.org/openssl.git +cd openssl +./config +make +cd .. +# Build Tachyon +make +``` +### MacOSX +Installation using [Homebrew](https://brew.sh/): +```bash +brew update +# If you do NOT have OpenSSL installed +brew install openssl +# If you do NOT have ZSTD installed +brew install zstd +# Install Tachyon +git clone --recursive https://github.com/mklarqvist/tachyon +cd tachyon make ``` diff --git a/makefile b/makefile index a6d1d82..8d27968 100644 --- a/makefile +++ b/makefile @@ -47,8 +47,33 @@ DEBUG_FLAGS := endif # Global build parameters -INCLUDE_PATH := -I"lib/" -I"zstd/lib/" -I"zstd/lib/common/" -I"/usr/local/opt/openssl/lib/" -I"/usr/include/openssl/" -I"/usr/local/include/" -ZSTD_LIBRARY_PATH := -L"zstd/lib" +INCLUDE_PATH = -I./lib/ +ZSTD_LIBRARY_PATH = + +# Check if ZSTD is in the current directory +ifneq ("$(wildcard ./zstd/)","") + INCLUDE_PATH += -I./zstd/lib/ -I./zstd/lib/common/ + ZSTD_LIBRARY_PATH = -L./zstd/lib +else ifneq ("$(wildcard /usr/local/include/)","") + INCLUDE_PATH += -I/usr/local/include/ + #ZSTD_LIBRARY_PATH = -L/usr/local/lib +endif + +# Try to deduce where OpenSSL is located +OPENSSL_LIBRARY_PATH = +ifneq ("$(wildcard ./openssl/)","") + INCLUDE_PATH += -I./openssl/include/openssl/ + OPENSSL_LIBRARY_PATH = -L./openssl/ +else ifneq ("$(wildcard /usr/local/include/openssl/)","") + INCLUDE_PATH += -I/usr/local/include/openssl/ + #OPENSSL_LIBRARY_PATH = -L/usr/local/lib/ +else ifneq ("$(wildcard /usr/include/openssl/evp.h)","") + INCLUDE_PATH += -I/usr/include/openssl/ + OPENSSL_LIBRARY_PATH = -L/usr/lib/x86_64-linux-gnu/ +endif + +LIBRARY_PATHS := $(ZSTD_LIBRARY_PATH) $(OPENSSL_LIBRARY_PATH) -L/usr/local/lib/ + OPTFLAGS := -O3 -msse4.2 # Legacy flags used #OPTFLAGS := -O3 -march=native -mtune=native -ftree-vectorize -pipe -frename-registers -funroll-loops @@ -60,15 +85,16 @@ endif # OS X linker doesn't support -soname, and use different extension # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html ifneq ($(shell uname), Darwin) -SHARED_EXT = so -LD_LIB_FLAGS := -shared -Wl,-rpath,"zstd/lib",-soname,libtachyon.$(SHARED_EXT) +SHARED_EXT = so +LD_LIB_FLAGS = -shared '-Wl,-rpath,$$ORIGIN/zstd/lib,-rpath,$$ORIGIN/openssl/,-soname,libtachyon.$(SHARED_EXT)' else -SHARED_EXT = dylib -LD_LIB_FLAGS := -dynamiclib -install_name libtachyon.$(SHARED_EXT) +SHARED_EXT = dylib +LD_LIB_FLAGS = -dynamiclib -install_name libtachyon.$(SHARED_EXT) '-Wl,-rpath,$$ORIGIN/zstd/lib,-rpath,$$ORIGIN/openssl/' endif -CXXFLAGS := -std=c++0x $(OPTFLAGS) $(DEBUG_FLAGS) -CFLAGS := -std=c99 $(OPTFLAGS) $(DEBUG_FLAGS) +CXXFLAGS = -std=c++0x $(OPTFLAGS) $(DEBUG_FLAGS) +CFLAGS = -std=c99 $(OPTFLAGS) $(DEBUG_FLAGS) +BINARY_RPATHS = '-Wl,-rpath,$$ORIGIN/zstd/lib,-rpath,$$ORIGIN/openssl/' LIBS := -lzstd -lcrypto CXX_SOURCE = $(wildcard lib/algorithm/compression/*.cpp) \ @@ -109,41 +135,44 @@ lib/third_party/zlib/zutil.c OBJECTS = $(CXX_SOURCE:.cpp=.o) $(C_SOURCE:.c=.o) CPP_DEPS = $(CXX_SOURCE:.cpp=.d) $(C_SOURCE:.c=.d) -LIB_INCLUDE_PATH = -I"lib/" -LIB_EXAMPLE_FLAGS = -L"$(PWD)" -ltachyon '-Wl,-rpath,$$ORIGIN/../,-rpath,"$(PWD)"' +LIB_INCLUDE_PATH = -I./lib/ +LIB_EXAMPLE_FLAGS = -L./ -ltachyon '-Wl,-rpath,$$ORIGIN/../,-rpath,$(PWD)' LIB_EXAMPLE_SOURCE = $(wildcard lib_example/*.cpp) LIB_EXAMPLE_OUTPUT = $(LIB_EXAMPLE_SOURCE:.cpp=) # Inject git information -BRANCH := $(shell git rev-parse --abbrev-ref HEAD) +BRANCH = $(shell git rev-parse --abbrev-ref HEAD) ifneq ($(BRANCH), master) -GIT_VERSION := $(shell git describe --abbrev=8 --dirty --always --tags)-$(BRANCH) +GIT_VERSION = $(shell git describe --abbrev=8 --dirty --always --tags)-$(BRANCH) else -GIT_VERSION := $(shell git describe --abbrev=8 --dirty --always --tags) +GIT_VERSION = $(shell git describe --abbrev=8 --dirty --always --tags) endif -# All Target +# Default target all: tachyon # Third party rules lib/third_party/xxhash/%.o: lib/third_party/xxhash/%.c - gcc $(CFLAGS) -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" -o "$@" "$<" + gcc $(CFLAGS) -c -o $@ $< lib/third_party/zlib/%.o: lib/third_party/zlib/%.c - gcc $(CFLAGS) -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" -o "$@" "$<" + gcc $(CFLAGS) -c -o $@ $< # Generic rules %.o: %.cpp - g++ $(CXXFLAGS) $(INCLUDE_PATH) -c -fmessage-length=0 -DVERSION=\"$(GIT_VERSION)\" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" -o "$@" "$<" + g++ $(CXXFLAGS) $(INCLUDE_PATH) -c -DVERSION=\"$(GIT_VERSION)\" -o $@ $< tachyon: $(OBJECTS) - g++ '-Wl,-rpath,$$ORIGIN/zstd/lib/,-rpath,"$(PWD)/zstd/lib/"' $(ZSTD_LIBRARY_PATH) -pthread -o "tachyon" $(OBJECTS) $(LIBS) + g++ $(BINARY_RPATHS) $(LIBRARY_PATHS) -pthread $(OBJECTS) $(LIBS) -o tachyon $(MAKE) cleanmost $(MAKE) library library=true + $(MAKE) examples library: $(OBJECTS) - @echo 'Building with positional independence...' - g++ $(LD_LIB_FLAGS) $(ZSTD_LIBRARY_PATH) -pthread -o libtachyon.$(SHARED_EXT).$(LIBVER) $(OBJECTS) $(LIBS) + @echo 'Building dynamic library...' + g++ $(LD_LIB_FLAGS) $(LIBRARY_PATHS) -pthread $(OBJECTS) $(LIBS) -o libtachyon.$(SHARED_EXT).$(LIBVER) + @echo 'Building static library...' + ar crs libtachyon.a $(OBJECTS) @echo 'Symlinking library...' ln -sf libtachyon.$(SHARED_EXT).$(LIBVER) libtachyon.$(SHARED_EXT) ln -sf libtachyon.$(SHARED_EXT).$(LIBVER) ltachyon.$(SHARED_EXT) @@ -151,7 +180,7 @@ library: $(OBJECTS) examples: $(LIB_EXAMPLE_OUTPUT) lib_example/%: lib_example/%.cpp - g++ $(CXXFLAGS) $(INCLUDE_PATH) $(LIB_INCLUDE_PATH) -fmessage-length=0 -DVERSION=\"$(GIT_VERSION)\" -o "$@" "$<" $(LIB_EXAMPLE_FLAGS) + g++ $(CXXFLAGS) $(INCLUDE_PATH) $(LIB_INCLUDE_PATH) $(LIB_EXAMPLE_FLAGS) -DVERSION=\"$(GIT_VERSION)\" -o $@ $< # Clean procedures clean_examples: From 99feeffa63f46473ede89ba8f798bd8746d65c65 Mon Sep 17 00:00:00 2001 From: mklarqvist Date: Thu, 5 Jul 2018 11:27:09 +0100 Subject: [PATCH 02/19] rpath change --- makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/makefile b/makefile index 8d27968..4a6806c 100644 --- a/makefile +++ b/makefile @@ -86,10 +86,10 @@ endif # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html ifneq ($(shell uname), Darwin) SHARED_EXT = so -LD_LIB_FLAGS = -shared '-Wl,-rpath,$$ORIGIN/zstd/lib,-rpath,$$ORIGIN/openssl/,-soname,libtachyon.$(SHARED_EXT)' +LD_LIB_FLAGS = -shared -Wl,-rpath,./zstd/lib,-rpath,./openssl/,-soname,libtachyon.$(SHARED_EXT) else SHARED_EXT = dylib -LD_LIB_FLAGS = -dynamiclib -install_name libtachyon.$(SHARED_EXT) '-Wl,-rpath,$$ORIGIN/zstd/lib,-rpath,$$ORIGIN/openssl/' +LD_LIB_FLAGS = -dynamiclib -install_name libtachyon.$(SHARED_EXT) -Wl,-rpath,./zstd/lib,-rpath,./openssl/ endif CXXFLAGS = -std=c++0x $(OPTFLAGS) $(DEBUG_FLAGS) From 590163d0a9d6bb20d69b1e63e226327044ecd697 Mon Sep 17 00:00:00 2001 From: mklarqvist Date: Thu, 5 Jul 2018 11:50:36 +0100 Subject: [PATCH 03/19] makefile rpath for local install and examples --- README.md | 2 +- makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f0a0864..961a123 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ make examples ``` ### Building without admin privilidges -If you have no super-user powers required to install software on your machine: +If you have no super-user (`sudo`) powers required to install software on your machine: ### Linux/MacOSX ```bash diff --git a/makefile b/makefile index 4a6806c..d6a086a 100644 --- a/makefile +++ b/makefile @@ -136,7 +136,7 @@ OBJECTS = $(CXX_SOURCE:.cpp=.o) $(C_SOURCE:.c=.o) CPP_DEPS = $(CXX_SOURCE:.cpp=.d) $(C_SOURCE:.c=.d) LIB_INCLUDE_PATH = -I./lib/ -LIB_EXAMPLE_FLAGS = -L./ -ltachyon '-Wl,-rpath,$$ORIGIN/../,-rpath,$(PWD)' +LIB_EXAMPLE_FLAGS = -L./ -ltachyon '-Wl,-rpath,$$ORIGIN/../,-rpath,$(PWD),-rpath,$$ORIGIN/../zstd/lib,-rpath,$$ORIGIN/../openssl' LIB_EXAMPLE_SOURCE = $(wildcard lib_example/*.cpp) LIB_EXAMPLE_OUTPUT = $(LIB_EXAMPLE_SOURCE:.cpp=) From dfab0dc7ee5b01b5e1a268004f939af593f30439 Mon Sep 17 00:00:00 2001 From: mklarqvist Date: Sun, 8 Jul 2018 19:09:37 +0100 Subject: [PATCH 04/19] makefile update; stub for mapper --- .cproject | 2 +- .settings/language.settings.xml | 4 +- README.md | 2 +- BENCHMARKS.md => docs/benchmarks.md | 0 .../components/variant_block_header.cpp | 1 + lib/containers/variant_block.h | 18 -- lib/containers/variant_block_container.h | 193 ++++++++++++++++++ makefile | 6 +- 8 files changed, 201 insertions(+), 25 deletions(-) rename BENCHMARKS.md => docs/benchmarks.md (100%) create mode 100644 lib/containers/variant_block_container.h diff --git a/.cproject b/.cproject index 2ee6807..174df18 100644 --- a/.cproject +++ b/.cproject @@ -20,7 +20,7 @@