Skip to content

Commit 9d8f88e

Browse files
committed
Fix off-by-one error for VCF regions
1 parent 94c84dd commit 9d8f88e

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

vcztools/regions.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,20 @@ def parse_targets_string(targets: str) -> tuple[str, Optional[int], Optional[int
1818
return contig, None, None
1919

2020

21-
def pslice_to_slice(
21+
def region_to_slice(
2222
all_contigs: List[str],
2323
variant_contig: Any,
2424
variant_position: Any,
2525
contig: str,
2626
start: Optional[int] = None,
2727
end: Optional[int] = None,
2828
) -> slice:
29+
"""Convert a VCF region to a Python slice."""
30+
31+
# subtract one from start since VCF is 1-based (fully closed),
32+
# but Python slices are 0-based (half open)
33+
start = None if start is None else start - 1
34+
2935
contig_index = all_contigs.index(contig)
3036
contig_range = np.searchsorted(variant_contig, [contig_index, contig_index + 1])
3137

vcztools/vcf_writer.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import MutableMapping, Optional, TextIO, Union
66

77
import numpy as np
8-
from vcztools.regions import parse_targets_string, pslice_to_slice
8+
from vcztools.regions import parse_targets_string, region_to_slice
99
import zarr
1010

1111
from . import _vcztools
@@ -168,7 +168,14 @@ def write_vcf(
168168
variant_mask = np.ones(pos.shape[0], dtype=bool)
169169
else:
170170
contig, start, end = parse_targets_string(variant_targets)
171-
variant_slice = pslice_to_slice(root["contig_id"][:].astype("U").tolist(), root["variant_contig"], pos, contig, start, end)
171+
variant_slice = region_to_slice(
172+
root["contig_id"][:].astype("U").tolist(),
173+
root["variant_contig"],
174+
pos,
175+
contig,
176+
start,
177+
end,
178+
)
172179
variant_mask = np.zeros(pos.shape[0], dtype=bool)
173180
variant_mask[variant_slice] = 1
174181
# Use zarr arrays to get mask chunks aligned with the main data

0 commit comments

Comments
 (0)