Skip to content

Commit 7bcd8bf

Browse files
Replace BioCore with BioGenerics (#14)
Replace BioCore dependency with BioGenerics. * ReaderHelper functionality that was part of BioCore but not BioGenerics is moved into VariantCallFormat. * Move files needed for unit testing into VariantCallFormat repo. * Patch bump.
1 parent aa7ca71 commit 7bcd8bf

30 files changed

+587
-40
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
name = "VariantCallFormat"
22
uuid = "28eba6e3-a997-4ad9-87c6-d933b8bca6c1"
33
authors = ["Rasmus Henningsson <rasmus.henningsson@gmail.com>", "Lund University"]
4-
version = "0.5.5"
4+
version = "0.5.6"
55

66
[deps]
77
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
88
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
9-
BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81"
9+
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
1010
BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
1111

1212
[compat]
1313
Automa = "0.7, =0.8.0, 0.8.2"
1414
BGZFStreams = "0.3"
15-
BioCore = "2.0.5"
15+
BioGenerics = "0.1"
1616
BufferedStreams = "1"
1717
julia = "1"
1818

src/ReaderHelper.jl

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
# Reader Helper
2+
# =============
3+
#
4+
# Utilities to generate file readers in BioJulia packages.
5+
#
6+
# This file is a part of BioJulia.
7+
# License is MIT: https://github.com/BioJulia/BioCore.jl/blob/master/LICENSE.md
8+
9+
module ReaderHelper
10+
11+
import Automa
12+
import BufferedStreams
13+
14+
mutable struct State{T<:BufferedStreams.BufferedInputStream}
15+
stream::T # input stream
16+
cs::Int # current DFA state of Ragel
17+
linenum::Int # line number: parser is responsible for updating this
18+
finished::Bool # true if finished (regardless of where in the stream we are)
19+
end
20+
21+
function State(initstate::Int, input::BufferedStreams.BufferedInputStream)
22+
return State(input, initstate, 1, false)
23+
end
24+
25+
26+
27+
@inline function anchor!(stream::BufferedStreams.BufferedInputStream, p, immobilize = true)
28+
stream.anchor = p
29+
stream.immobilized = immobilize
30+
return stream
31+
end
32+
33+
@inline function upanchor!(stream::BufferedStreams.BufferedInputStream)
34+
@assert stream.anchor != 0 "upanchor! called with no anchor set"
35+
anchor = stream.anchor
36+
stream.anchor = 0
37+
stream.immobilized = false
38+
return anchor
39+
end
40+
41+
function ensure_margin!(stream::BufferedStreams.BufferedInputStream)
42+
if stream.position * 20 > length(stream.buffer) * 19
43+
BufferedStreams.shiftdata!(stream)
44+
end
45+
return nothing
46+
end
47+
48+
@inline function resize_and_copy!(dst::Vector{UInt8}, src::Vector{UInt8}, r::UnitRange{Int})
49+
return resize_and_copy!(dst, 1, src, r)
50+
end
51+
52+
@inline function resize_and_copy!(dst::Vector{UInt8}, dstart::Int, src::Vector{UInt8}, r::UnitRange{Int})
53+
rlen = length(r)
54+
if length(dst) != dstart + rlen - 1
55+
resize!(dst, dstart + rlen - 1)
56+
end
57+
copyto!(dst, dstart, src, first(r), rlen)
58+
return dst
59+
end
60+
61+
@inline function append_from_anchor!(dst::Vector{UInt8}, dstart::Int, stream::BufferedStreams.BufferedInputStream, p::Int)
62+
return resize_and_copy!(dst, dstart, stream.buffer, upanchor!(stream):p)
63+
end
64+
65+
function generate_index_function(record_type, machine, init_code, actions; kwargs...)
66+
kwargs = Dict(kwargs)
67+
context = Automa.CodeGenContext(
68+
generator = get(kwargs, :generator, :goto),
69+
checkbounds = get(kwargs, :checkbounds, false),
70+
loopunroll = get(kwargs, :loopunroll, 0)
71+
)
72+
quote
73+
function index!(record::$(record_type))
74+
data = record.data
75+
p = 1
76+
p_end = p_eof = sizeof(data)
77+
initialize!(record)
78+
$(init_code)
79+
cs = $(machine.start_state)
80+
$(Automa.generate_exec_code(context, machine, actions))
81+
if cs != 0
82+
throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)])))))
83+
end
84+
@assert isfilled(record)
85+
return record
86+
end
87+
end
88+
end
89+
90+
function generate_readheader_function(reader_type, metainfo_type, machine, init_code, actions, finish_code=:())
91+
quote
92+
function readheader!(reader::$(reader_type))
93+
_readheader!(reader, reader.state)
94+
end
95+
96+
function _readheader!(reader::$(reader_type), state::ReaderHelper.State)
97+
stream = state.stream
98+
ReaderHelper.ensure_margin!(stream)
99+
cs = state.cs
100+
linenum = state.linenum
101+
data = stream.buffer
102+
p = stream.position
103+
p_end = stream.available
104+
p_eof = -1
105+
finish_header = false
106+
record = $(metainfo_type)()
107+
108+
$(init_code)
109+
110+
while true
111+
$(Automa.generate_exec_code(Automa.CodeGenContext(generator=:table), machine, actions))
112+
113+
state.cs = cs
114+
state.finished = cs == 0
115+
state.linenum = linenum
116+
stream.position = p
117+
118+
if cs < 0
119+
error("$($(reader_type)) file format error on line ", linenum)
120+
elseif finish_header
121+
$(finish_code)
122+
break
123+
elseif p > p_eof 0
124+
error("incomplete $($(reader_type)) input on line ", linenum)
125+
else
126+
hits_eof = BufferedStreams.fillbuffer!(stream) == 0
127+
p = stream.position
128+
p_end = stream.available
129+
if hits_eof
130+
p_eof = p_end
131+
end
132+
end
133+
end
134+
end
135+
end
136+
end
137+
138+
function generate_read_function(reader_type, machine, init_code, actions; kwargs...)
139+
kwargs = Dict(kwargs)
140+
context = Automa.CodeGenContext(
141+
generator=get(kwargs, :generator, :goto),
142+
checkbounds=get(kwargs, :checkbounds, false),
143+
loopunroll=get(kwargs, :loopunroll, 0)
144+
)
145+
quote
146+
function Base.read!(reader::$(reader_type), record::eltype($(reader_type)))::eltype($(reader_type))
147+
return _read!(reader, reader.state, record)
148+
end
149+
150+
function _read!(reader::$(reader_type), state::ReaderHelper.State, record::eltype($(reader_type)))
151+
stream = state.stream
152+
ReaderHelper.ensure_margin!(stream)
153+
cs = state.cs
154+
linenum = state.linenum
155+
data = stream.buffer
156+
p = stream.position
157+
p_end = stream.available
158+
p_eof = -1
159+
found_record = false
160+
initialize!(record)
161+
162+
$(init_code)
163+
164+
if state.finished
165+
throw(EOFError())
166+
end
167+
168+
while true
169+
$(Automa.generate_exec_code(context, machine, actions))
170+
171+
state.cs = cs
172+
state.finished |= cs == 0
173+
state.linenum = linenum
174+
stream.position = p
175+
176+
if cs < 0
177+
error($(reader_type), " file format error on line ", linenum, " ~>", repr(String(data[p:min(p+7,p_end)])))
178+
elseif found_record
179+
break
180+
elseif cs == 0
181+
throw(EOFError())
182+
elseif p > p_eof 0
183+
error("incomplete $($(reader_type)) input on line ", linenum)
184+
elseif BufferedStreams.available_bytes(stream) < 64
185+
hits_eof = BufferedStreams.fillbuffer!(stream) == 0
186+
p = stream.position
187+
p_end = stream.available
188+
if hits_eof
189+
p_eof = p_end
190+
end
191+
end
192+
end
193+
194+
@assert isfilled(record)
195+
return record
196+
end
197+
end
198+
end
199+
200+
end

src/VariantCallFormat.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@ module VariantCallFormat
22

33
import Automa
44
import Automa.RegExp: @re_str
5-
import BioCore
6-
import BioCore: isfilled, metainfotag, metainfoval, header
7-
import BioCore.Exceptions: missingerror
5+
import BioGenerics: BioGenerics, isfilled, metainfotag, metainfoval, header
6+
import BioGenerics.Exceptions: missingerror, MissingFieldException
87
import BufferedStreams
98

109
export
@@ -16,6 +15,8 @@ export
1615
isfilled,
1716
MissingFieldException
1817

18+
include("ReaderHelper.jl")
19+
1920
include("record.jl")
2021
include("metainfo.jl")
2122
include("header.jl")

src/bcf/bcf.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module BCF
22

3-
import BioCore: BioCore, isfilled, header
3+
import BioGenerics: BioGenerics, isfilled, header
44
import VariantCallFormat: VCF
55
import BGZFStreams
66
import BufferedStreams

src/bcf/reader.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
struct Reader{T<:IO} <: BioCore.IO.AbstractReader
1+
struct Reader{T<:IO} <: BioGenerics.IO.AbstractReader
22
version::Tuple{UInt8,UInt8} # (major, minor)
33
header::VCF.Header
44
stream::BGZFStreams.BGZFStream{T}
@@ -40,7 +40,7 @@ function Base.eltype(::Type{Reader{T}}) where T
4040
return Record
4141
end
4242

43-
function BioCore.IO.stream(reader::Reader)
43+
function BioGenerics.IO.stream(reader::Reader)
4444
return reader.stream
4545
end
4646

src/bcf/writer.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
struct Writer{T<:IO} <: BioCore.IO.AbstractWriter
1+
struct Writer{T<:IO} <: BioGenerics.IO.AbstractWriter
22
stream::BGZFStreams.BGZFStream{T}
33
end
44

@@ -24,7 +24,7 @@ function Writer(output::IO, header::VCF.Header)
2424
return Writer(stream)
2525
end
2626

27-
function BioCore.IO.stream(writer::Writer)
27+
function BioGenerics.IO.stream(writer::Writer)
2828
return writer.stream
2929
end
3030

src/header.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ end
5151

5252
function Base.show(io::IO, header::Header)
5353
println(io, summary(header), ':')
54-
tags = BioCore.metainfotag.(header.metainfo)
54+
tags = metainfotag.(header.metainfo)
5555
println(io, " metainfo tags: ", join(unique(tags), ' '))
5656
print(io, " sample IDs: ", join(header.sampleID, ' '))
5757
end

src/metainfo.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,12 @@ function isequaltag(metainfo::MetaInfo, tag::AbstractString)
124124
memcmp(pointer(metainfo.data, first(metainfo.tag)), pointer(tag), length(metainfo.tag)) == 0
125125
end
126126

127-
function BioCore.metainfotag(metainfo::MetaInfo)
127+
function BioGenerics.metainfotag(metainfo::MetaInfo)
128128
checkfilled(metainfo)
129129
return String(metainfo.data[metainfo.tag])
130130
end
131131

132-
function BioCore.metainfoval(metainfo::MetaInfo)
132+
function BioGenerics.metainfoval(metainfo::MetaInfo)
133133
checkfilled(metainfo)
134134
return String(metainfo.data[metainfo.val])
135135
end
@@ -182,14 +182,14 @@ function Base.show(io::IO, metainfo::MetaInfo)
182182
print(io, summary(metainfo), ':')
183183
if isfilled(metainfo)
184184
println(io)
185-
println(io, " tag: ", BioCore.metainfotag(metainfo))
185+
println(io, " tag: ", metainfotag(metainfo))
186186
print(io, " value:")
187187
if metainfo.dict
188188
for (key, val) in zip(keys(metainfo), values(metainfo))
189189
print(io, ' ', key, "=\"", val, '"')
190190
end
191191
else
192-
print(io, ' ', BioCore.metainfoval(metainfo))
192+
print(io, ' ', metainfoval(metainfo))
193193
end
194194
else
195195
print(io, " <not filled>")

0 commit comments

Comments
 (0)