From f8d431e8f2f69b32834074d8502e20a4f2fba3d4 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 28 May 2025 21:25:09 +0100 Subject: [PATCH 1/4] Update changelog Closes #226 --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index af41c91..96f790e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## [0.1.0] - 2025-05-29 + +Improvements: + +- Support filtering by FILTER (#217), CHROM (#223) and general string values (#220) +- Support regions (-r/-t), filter expressions (-i/-e) and samples (-s) in query command (#205) +- Various improvements to support VCZ datasets produced from tskit and plink files by bio2zarr. +- Use a fully dynamically generated header via ``vcf_meta_information`` attributes +(#208). Requires vcf-zarr version >= 0.4 (bio2zarr >= 0.1.6) to fully recover the original +header. +- Add --version (#197) + +Breaking: + +- Update minimum Click version to 8.2.0 (#206) + ## [0.0.2] - 2025-04-04 Important bugfixes for filtering language and sample subsetting. From b0636832b705bbbc588b57860f2ce10c76cba246 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 28 May 2025 21:47:51 +0100 Subject: [PATCH 2/4] Document CLI commands Closes #161 --- vcztools/cli.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/vcztools/cli.py b/vcztools/cli.py index b84b947..c3fa30c 100644 --- a/vcztools/cli.py +++ b/vcztools/cli.py @@ -111,6 +111,11 @@ def list_commands(self, ctx): ) @handle_exception def index(path, nrecords, stats): + """ + Query the number of records in a VCZ dataset. This subcommand only + implements the --nrecords and --stats options and does not build any + indexes. + """ if nrecords and stats: raise click.UsageError("Expected only one of --stats or --nrecords options") if nrecords: @@ -156,6 +161,15 @@ def query( include, exclude, ): + """ + Transform VCZ into user-defined formats with efficient subsetting and + filtering. Intended as a drop-in replacement for bcftools query, where we + replace the VCF file path with a VCZ dataset URL. + + This is an early version an not feature complete: if you are missing a + particular piece of functionality please open and issue at + https://github.com/sgkit-dev/vcztools/issues + """ if list_samples: # bcftools query -l ignores the --output option and always writes to stdout output = sys.stdout @@ -241,6 +255,15 @@ def view( include, exclude, ): + """ + Convert VCZ dataset to VCF with efficient subsetting and filtering. + Intended as a drop-in replacement for bcftools view, where + we replace the VCF file path with a VCZ dataset URL. + + This is an early version an not feature complete: if you are missing a + particular piece of functionality please open and issue at + https://github.com/sgkit-dev/vcztools/issues + """ suffix = output.name.split(".")[-1] # Exclude suffixes which require bgzipped or BCF output: # https://github.com/samtools/htslib/blob/329e7943b7ba3f0af15b0eaa00a367a1ac15bd83/vcf.c#L3815 From 4c32d9baf62b616582fcc5fde2051dcab852b18c Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 28 May 2025 21:52:20 +0100 Subject: [PATCH 3/4] Add pypi download badge --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 67462e6..48fbe2f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![CI](https://github.com/sgkit-dev/vcztools/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/sgkit-dev/vcztools/actions/workflows/ci.yml) +[![PyPI Downloads](https://static.pepy.tech/badge/vcztools)](https://pepy.tech/projects/vcztools) # vcztools Partial reimplementation of bcftools for [VCF Zarr](https://github.com/sgkit-dev/vcf-zarr-spec/) @@ -17,16 +18,16 @@ python3 -m pip install vcztools ``` vcztools view ``` -or +or ``` python -m vcztools view ``` -should be equivalent to running +should be equivalent to running ``` bcftools view ``` -See the [bio2zarr](https://sgkit-dev.github.io/bio2zarr/) project for help in +See the [bio2zarr](https://sgkit-dev.github.io/bio2zarr/) project for help in converting VCF files to Zarr. ## Goals @@ -34,7 +35,7 @@ converting VCF files to Zarr. Vcztools aims to be a drop-in replacement for a subset of bcftools functionality. Currently supported are the ``view``, ``query`` and ``index -s/-n`` commands. -We aim for 100% compatibility so if you notice a difference between the output of +We aim for 100% compatibility so if you notice a difference between the output of vcztools and bcftools please do open an issue. ## Cloud stores @@ -62,6 +63,6 @@ python -m vcztools view s3:///path/to.vcz ## Development -Vcztools is under active development and contributions are warmly welcomed. Please +Vcztools is under active development and contributions are warmly welcomed. Please see the project on [GitHub](https://github.com/sgkit-dev/vcztools). From 4eb4fdaad48f071fa503a45bc1b975197aa06e02 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Thu, 29 May 2025 09:13:24 +0100 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: Tom White --- vcztools/cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vcztools/cli.py b/vcztools/cli.py index c3fa30c..4311cbc 100644 --- a/vcztools/cli.py +++ b/vcztools/cli.py @@ -166,8 +166,8 @@ def query( filtering. Intended as a drop-in replacement for bcftools query, where we replace the VCF file path with a VCZ dataset URL. - This is an early version an not feature complete: if you are missing a - particular piece of functionality please open and issue at + This is an early version and not feature complete: if you are missing a + particular piece of functionality please open an issue at https://github.com/sgkit-dev/vcztools/issues """ if list_samples: @@ -260,8 +260,8 @@ def view( Intended as a drop-in replacement for bcftools view, where we replace the VCF file path with a VCZ dataset URL. - This is an early version an not feature complete: if you are missing a - particular piece of functionality please open and issue at + This is an early version and not feature complete: if you are missing a + particular piece of functionality please open an issue at https://github.com/sgkit-dev/vcztools/issues """ suffix = output.name.split(".")[-1]