Skip to content

Commit

Permalink
Release 1.8
Browse files Browse the repository at this point in the history
  • Loading branch information
daviesrob committed Apr 3, 2018
2 parents 209f94b + 107e7d1 commit be22a2a
Show file tree
Hide file tree
Showing 67 changed files with 2,869 additions and 588 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ build_script:
- set HOME=.
- set MSYSTEM=MINGW64
- set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
- "sh -lc \"aclocal && autoheader && autoconf && ./configure && make -j2\""
- "sh -lc \"aclocal && autoheader && autoconf && ./configure CFLAGS='-Wno-format -g -O2' && make -j2\""

#build_script:
# - make
Expand Down
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*.cram -text diff=cram

# Omit these files from release tarballs.
/.appveyor.yml export-ignore
.git* export-ignore
/.travis.yml export-ignore
README.md export-ignore
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ lib*.so.*
/test/test-bcf-sr
/test/test-bcf-translate
/test/test_bgzf
/test/test_realn
/test/test-regidx
/test/test-vcf-api
/test/test-vcf-sweep
Expand Down
15 changes: 14 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ env:
- USE_CONFIG=no
- USE_CONFIG=yes

matrix:
include:
- compiler: gcc
os: linux
env: USE_CONFIG=yes USE_LIBDEFLATE=yes
- compiler: clang
os: osx
env: USE_CONFIG=yes USE_LIBDEFLATE=yes

# For linux systems
addons:
apt:
Expand All @@ -24,5 +33,9 @@ addons:
before_install:
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$USE_CONFIG" == "no" ]]; then HOMEBREW_NO_AUTO_UPDATE=1 brew install xz || ( brew update && brew install xz ); fi

before_script:
- if test "x$USE_LIBDEFLATE" == "xyes" ; then ( cd "$HOME" && git clone --depth 1 https://github.com/ebiggers/libdeflate.git && cd libdeflate && make -j 2 CFLAGS='-fPIC -O3' libdeflate.a ); fi

script:
- if test "$USE_CONFIG" = "yes" ; then autoreconf && ./configure ; fi && make -e && make test
- if test "x$USE_LIBDEFLATE" = "xyes" ; then CONFIG_OPTS='CPPFLAGS="-I$HOME/libdeflate" LDFLAGS="-L$HOME/libdeflate" --with-libdeflate' ; else CONFIG_OPTS='--without-libdeflate' ; fi
- if test "$USE_CONFIG" = "yes" ; then autoreconf && eval ./configure $CONFIG_OPTS || { cat config.log ; false ; } ; fi && make -j 2 -e && make test
16 changes: 16 additions & 0 deletions INSTALL
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ various features and specify further optional external requirements:
by default. It can be disabled with --disable-lzma, but be aware
that not all CRAM files may be possible to decode.

--with-libdeflate
Libdeflate is a heavily optimized library for DEFLATE-based compression
and decompression. It also includes a fast crc32 implementation.
By default, ./configure will probe for libdeflate and use it if
available. To prevent this, use --without-libdeflate.

The configure script also accepts the usual options and environment variables
for tuning installation locations and compilers: type './configure --help'
for details. For example,
Expand All @@ -158,6 +164,16 @@ for details. For example,
would specify that HTSlib is to be built with icc and installed into bin,
lib, etc subdirectories under /opt/icc-compiled.

If dependencies have been installed in non-standard locations (i.e. not on
the normal include and library search paths) then the CPPFLAGS and LDFLAGS
environment variables can be used to set the options needed to find them.
For example, NetBSD users may use:

./configure CPPFLAGS=-I/usr/pkg/include \
LDFLAGS='-L/usr/pkg/lib -Wl,-R/usr/pkg/lib'

to allow compiling and linking against dependencies installed via the ports
collection.

Installation Locations
======================
Expand Down
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ BUILT_TEST_PROGRAMS = \
test/hfile \
test/sam \
test/test_bgzf \
test/test_realn \
test/test-regidx \
test/test_view \
test/test-vcf-api \
Expand Down Expand Up @@ -377,6 +378,9 @@ test/sam: test/sam.o libhts.a
test/test_bgzf: test/test_bgzf.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a -lz $(LIBS) -lpthread

test/test_realn: test/test_realn.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread

test/test-regidx: test/test-regidx.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread

Expand All @@ -400,6 +404,7 @@ test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h)
test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h)
test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h)
test/test_bgzf.o: test/test_bgzf.c $(htslib_bgzf_h) $(htslib_hfile_h)
test/test-realn.o: test/test_realn.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h)
test/test-regidx.o: test/test-regidx.c config.h $(htslib_regidx_h) $(hts_internal_h)
test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h)
test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h)
Expand Down Expand Up @@ -434,7 +439,7 @@ install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB
if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi
$(INSTALL_DATA) htslib/*.h $(DESTDIR)$(includedir)/htslib
$(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a
$(INSTALL_MAN) htsfile.1 tabix.1 $(DESTDIR)$(man1dir)
$(INSTALL_MAN) bgzip.1 htsfile.1 tabix.1 $(DESTDIR)$(man1dir)
$(INSTALL_MAN) faidx.5 sam.5 vcf.5 $(DESTDIR)$(man5dir)

installdirs:
Expand Down
48 changes: 48 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,51 @@
Noteworthy changes in release 1.8 (3rd April 2018)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* The URL to get sequences from the EBI reference server has been changed
to https://. This is because the EBI no longer serve sequences via
plain HTTP - requests to the http:// endpoint just get redirected.
HTSlib needs to be linked against libcurl to download https:// URLs,
so CRAM users who want to get references from the EBI will need to
run configure and ensure libcurl support is enabled using the
--enable-libcurl option.

* Added libdeflate as a build option for alternative faster compression and
decompression. Results vary by CPU but compression should be twice as fast
and decompression faster.

* It is now possible to set the compression level in bgzip. (#675; thanks
to Nathan Weeks).

* bgzip now gets its own manual page.

* CRAM encoding now stored MD and NM tags verbatim where the reference
contains 'N' characters, to work around ambiguities in the SAM
specification (samtools #717/762).
Also added "store_md" and "store_nm" cram-options for forcing these
tags to be stored at all locations. This is best when combined with
a subsequent decode_md=0 option while reading CRAM.

* Multiple CRAM bug fixes, including a fix to free and the subsequent reuse of
references with `-T ref.fa`. (#654; reported by Chris Saunders)

* CRAM multi-threading bugs fixed: don't try to call flush on reading;
processing of multiple range queries; problems with multi-slice containers.

* Fixed crashes caused when decoding some cramtools produced CRAM files.

* Fixed a couple of minor rANS issues with handling invalid data.

* Fixed bug where probaln_glocal() tried to allocate far more memory than
needed when the query sequence was much longer than the reference. This
caused crashes in samtools and bcftools mpileup when used on data with very
long reads. (#572, problem reported by Felix Bemm via minimap2).

* sam_prop_realn() now returns -1 (the same value as for unmapped reads)
on reads that do not include at least one 'M', 'X' or '=' CIGAR operator,
and no longer adds BQ or ZQ tags. BAQ adjustments are only made to bases
covered by these operators so there is no point in trying to align
reads that do not have them. (#572)

Noteworthy changes in release 1.7 (26th January 2018)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
14 changes: 10 additions & 4 deletions bcf_sr_sort.c
Original file line number Diff line number Diff line change
Expand Up @@ -628,10 +628,16 @@ int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int mi
}
void bcf_sr_sort_remove_reader(bcf_srs_t *readers, sr_sort_t *srt, int i)
{
free(srt->vcf_buf[i].rec);
if ( i+1 < srt->nsr )
memmove(&srt->vcf_buf[i], &srt->vcf_buf[i+1], (srt->nsr - i - 1)*sizeof(vcf_buf_t));
memset(srt->vcf_buf + srt->nsr - 1, 0, sizeof(vcf_buf_t));
//vcf_buf is allocated only in bcf_sr_sort_next
//So, a call to bcf_sr_add_reader() followed immediately by bcf_sr_remove_reader()
//would cause the program to crash in this segment
if (srt->vcf_buf)
{
free(srt->vcf_buf[i].rec);
if ( i+1 < srt->nsr )
memmove(&srt->vcf_buf[i], &srt->vcf_buf[i+1], (srt->nsr - i - 1)*sizeof(vcf_buf_t));
memset(srt->vcf_buf + srt->nsr - 1, 0, sizeof(vcf_buf_t));
}
}
sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt)
{
Expand Down
104 changes: 97 additions & 7 deletions bgzf.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
#include <sys/types.h>
#include <inttypes.h>

#ifdef HAVE_LIBDEFLATE
#include <libdeflate.h>
#endif

#include "htslib/hts.h"
#include "htslib/bgzf.h"
#include "htslib/hfile.h"
Expand Down Expand Up @@ -359,6 +363,64 @@ BGZF *bgzf_hopen(hFILE *hfp, const char *mode)
return fp;
}

#ifdef HAVE_LIBDEFLATE
int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level)
{
if (slen == 0) {
// EOF block
if (*dlen < 28) return -1;
memcpy(_dst, "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", 28);
*dlen = 28;
return 0;
}

uint8_t *dst = (uint8_t*)_dst;

if (level == 0) {
// Uncompressed data
if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1;
dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951
u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length
u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length
memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen);
*dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;

} else {
level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default
// NB levels go up to 12 here.
struct libdeflate_compressor *z = libdeflate_alloc_compressor(level);
if (!z) return -1;

// Raw deflate
size_t clen =
libdeflate_deflate_compress(z, src, slen,
dst + BLOCK_HEADER_LENGTH,
*dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH);

if (clen <= 0) {
hts_log_error("Call to libdeflate_deflate_compress failed");
libdeflate_free_compressor(z);
return -1;
}

*dlen = clen + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;

libdeflate_free_compressor(z);
}

// write the header
memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block
packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes

// write the footer
uint32_t crc = libdeflate_crc32(0, src, slen);
packInt32((uint8_t*)&dst[*dlen - 8], crc);
packInt32((uint8_t*)&dst[*dlen - 4], slen);
return 0;
}

#else

int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level)
{
uint32_t crc;
Expand Down Expand Up @@ -395,6 +457,7 @@ int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int le
packInt32((uint8_t*)&dst[*dlen - 4], slen);
return 0;
}
#endif // HAVE_LIBDEFLATE

static int bgzf_gzip_compress(BGZF *fp, void *_dst, size_t *dlen, const void *src, size_t slen, int level)
{
Expand Down Expand Up @@ -438,6 +501,28 @@ static int deflate_block(BGZF *fp, int block_length)
return comp_size;
}

#ifdef HAVE_LIBDEFLATE

static int bgzf_uncompress(uint8_t *dst, size_t *dlen, const uint8_t *src, size_t slen) {
struct libdeflate_decompressor *z = libdeflate_alloc_decompressor();
if (!z) {
hts_log_error("Call to libdeflate_alloc_decompressor failed");
return -1;
}

int ret = libdeflate_deflate_decompress(z, src, slen, dst, *dlen, dlen);
libdeflate_free_decompressor(z);

if (ret != LIBDEFLATE_SUCCESS) {
hts_log_error("Inflate operation failed: %d", ret);
return -1;
}

return 0;
}

#else

static int bgzf_uncompress(uint8_t *dst, size_t *dlen, const uint8_t *src, size_t slen) {
z_stream zs;
zs.zalloc = NULL;
Expand Down Expand Up @@ -467,6 +552,7 @@ static int bgzf_uncompress(uint8_t *dst, size_t *dlen, const uint8_t *src, size_
*dlen = *dlen - zs.avail_out;
return 0;
}
#endif // HAVE_LIBDEFLATE

// Inflate the block in fp->compressed_block into fp->uncompressed_block
static int inflate_block(BGZF* fp, int block_length)
Expand All @@ -482,7 +568,11 @@ static int inflate_block(BGZF* fp, int block_length)
// Check CRC of uncompressed block matches the gzip header.
// NB: we may wish to switch out the zlib crc32 for something more performant.
// See PR#361 and issue#467
#ifdef HAVE_LIBDEFLATE
uint32_t c1 = libdeflate_crc32(0L, (unsigned char *)fp->uncompressed_block, dlen);
#else
uint32_t c1 = crc32(0L, (unsigned char *)fp->uncompressed_block, dlen);
#endif
uint32_t c2 = le_to_u32((uint8_t *)fp->compressed_block + block_length-8);
if (c1 != c2) {
fp->errcode |= BGZF_ERR_CRC;
Expand Down Expand Up @@ -1160,7 +1250,7 @@ static void *bgzf_mt_reader(void *vp) {
pthread_cond_signal(&mt->command_c);
pthread_mutex_unlock(&mt->command_m);
hts_tpool_process_destroy(mt->out_queue);
pthread_exit(NULL);
return NULL;

default:
break;
Expand All @@ -1182,7 +1272,7 @@ static void *bgzf_mt_reader(void *vp) {
// We tear down the multi-threaded decoder and revert to the old code.
hts_tpool_dispatch(mt->pool, mt->out_queue, bgzf_nul_func, j);
hts_tpool_process_ref_decr(mt->out_queue);
pthread_exit(&j->errcode);
return &j->errcode;
}

// Dispatch an empty block so EOF is spotted.
Expand All @@ -1193,7 +1283,7 @@ static void *bgzf_mt_reader(void *vp) {
hts_tpool_dispatch(mt->pool, mt->out_queue, bgzf_nul_func, j);
if (j->errcode != 0) {
hts_tpool_process_destroy(mt->out_queue);
pthread_exit(&j->errcode);
return &j->errcode;
}

// We hit EOF so can stop reading, but we may get a subsequent
Expand Down Expand Up @@ -1224,10 +1314,9 @@ static void *bgzf_mt_reader(void *vp) {
pthread_cond_signal(&mt->command_c);
pthread_mutex_unlock(&mt->command_m);
hts_tpool_process_destroy(mt->out_queue);
pthread_exit(NULL);
return NULL;
}
}
return NULL;
}

int bgzf_thread_pool(BGZF *fp, hts_tpool *pool, int qsize) {
Expand Down Expand Up @@ -1452,7 +1541,7 @@ ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length)
uint64_t ublock_size; // amount of uncompressed data to be fed into next block
while (remaining > 0) {
current_block = fp->idx->moffs - fp->idx->noffs;
ublock_size = fp->idx->offs[current_block+1].uaddr-fp->idx->offs[current_block].uaddr;
ublock_size = current_block + 1 < fp->idx->moffs ? fp->idx->offs[current_block+1].uaddr-fp->idx->offs[current_block].uaddr : BGZF_MAX_BLOCK_SIZE;
uint8_t* buffer = (uint8_t*)fp->uncompressed_block;
int copy_length = ublock_size - fp->block_offset;
if (copy_length > remaining) copy_length = remaining;
Expand All @@ -1462,7 +1551,8 @@ ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length)
remaining -= copy_length;
if (fp->block_offset == ublock_size) {
if (lazy_flush(fp) != 0) return -1;
fp->idx->noffs--; // decrement noffs to track the blocks
if (fp->idx->noffs > 0)
fp->idx->noffs--; // decrement noffs to track the blocks
}
}
return length - remaining;
Expand Down
Loading

0 comments on commit be22a2a

Please sign in to comment.