|
| 1 | +# Reader Helper |
| 2 | +# ============= |
| 3 | +# |
| 4 | +# Utilities to generate file readers in BioJulia packages. |
| 5 | +# |
| 6 | +# This file is a part of BioJulia. |
| 7 | +# License is MIT: https://github.com/BioJulia/BioCore.jl/blob/master/LICENSE.md |
| 8 | + |
| 9 | +module ReaderHelper |
| 10 | + |
| 11 | +import Automa |
| 12 | +import BufferedStreams |
| 13 | + |
| 14 | +mutable struct State{T<:BufferedStreams.BufferedInputStream} |
| 15 | + stream::T # input stream |
| 16 | + cs::Int # current DFA state of Ragel |
| 17 | + linenum::Int # line number: parser is responsible for updating this |
| 18 | + finished::Bool # true if finished (regardless of where in the stream we are) |
| 19 | +end |
| 20 | + |
| 21 | +function State(initstate::Int, input::BufferedStreams.BufferedInputStream) |
| 22 | + return State(input, initstate, 1, false) |
| 23 | +end |
| 24 | + |
| 25 | + |
| 26 | + |
| 27 | +@inline function anchor!(stream::BufferedStreams.BufferedInputStream, p, immobilize = true) |
| 28 | + stream.anchor = p |
| 29 | + stream.immobilized = immobilize |
| 30 | + return stream |
| 31 | +end |
| 32 | + |
| 33 | +@inline function upanchor!(stream::BufferedStreams.BufferedInputStream) |
| 34 | + @assert stream.anchor != 0 "upanchor! called with no anchor set" |
| 35 | + anchor = stream.anchor |
| 36 | + stream.anchor = 0 |
| 37 | + stream.immobilized = false |
| 38 | + return anchor |
| 39 | +end |
| 40 | + |
| 41 | +function ensure_margin!(stream::BufferedStreams.BufferedInputStream) |
| 42 | + if stream.position * 20 > length(stream.buffer) * 19 |
| 43 | + BufferedStreams.shiftdata!(stream) |
| 44 | + end |
| 45 | + return nothing |
| 46 | +end |
| 47 | + |
| 48 | +@inline function resize_and_copy!(dst::Vector{UInt8}, src::Vector{UInt8}, r::UnitRange{Int}) |
| 49 | + return resize_and_copy!(dst, 1, src, r) |
| 50 | +end |
| 51 | + |
| 52 | +@inline function resize_and_copy!(dst::Vector{UInt8}, dstart::Int, src::Vector{UInt8}, r::UnitRange{Int}) |
| 53 | + rlen = length(r) |
| 54 | + if length(dst) != dstart + rlen - 1 |
| 55 | + resize!(dst, dstart + rlen - 1) |
| 56 | + end |
| 57 | + copyto!(dst, dstart, src, first(r), rlen) |
| 58 | + return dst |
| 59 | +end |
| 60 | + |
| 61 | +@inline function append_from_anchor!(dst::Vector{UInt8}, dstart::Int, stream::BufferedStreams.BufferedInputStream, p::Int) |
| 62 | + return resize_and_copy!(dst, dstart, stream.buffer, upanchor!(stream):p) |
| 63 | +end |
| 64 | + |
| 65 | +function generate_index_function(record_type, machine, init_code, actions; kwargs...) |
| 66 | + kwargs = Dict(kwargs) |
| 67 | + context = Automa.CodeGenContext( |
| 68 | + generator = get(kwargs, :generator, :goto), |
| 69 | + checkbounds = get(kwargs, :checkbounds, false), |
| 70 | + loopunroll = get(kwargs, :loopunroll, 0) |
| 71 | + ) |
| 72 | + quote |
| 73 | + function index!(record::$(record_type)) |
| 74 | + data = record.data |
| 75 | + p = 1 |
| 76 | + p_end = p_eof = sizeof(data) |
| 77 | + initialize!(record) |
| 78 | + $(init_code) |
| 79 | + cs = $(machine.start_state) |
| 80 | + $(Automa.generate_exec_code(context, machine, actions)) |
| 81 | + if cs != 0 |
| 82 | + throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)]))))) |
| 83 | + end |
| 84 | + @assert isfilled(record) |
| 85 | + return record |
| 86 | + end |
| 87 | + end |
| 88 | +end |
| 89 | + |
| 90 | +function generate_readheader_function(reader_type, metainfo_type, machine, init_code, actions, finish_code=:()) |
| 91 | + quote |
| 92 | + function readheader!(reader::$(reader_type)) |
| 93 | + _readheader!(reader, reader.state) |
| 94 | + end |
| 95 | + |
| 96 | + function _readheader!(reader::$(reader_type), state::ReaderHelper.State) |
| 97 | + stream = state.stream |
| 98 | + ReaderHelper.ensure_margin!(stream) |
| 99 | + cs = state.cs |
| 100 | + linenum = state.linenum |
| 101 | + data = stream.buffer |
| 102 | + p = stream.position |
| 103 | + p_end = stream.available |
| 104 | + p_eof = -1 |
| 105 | + finish_header = false |
| 106 | + record = $(metainfo_type)() |
| 107 | + |
| 108 | + $(init_code) |
| 109 | + |
| 110 | + while true |
| 111 | + $(Automa.generate_exec_code(Automa.CodeGenContext(generator=:table), machine, actions)) |
| 112 | + |
| 113 | + state.cs = cs |
| 114 | + state.finished = cs == 0 |
| 115 | + state.linenum = linenum |
| 116 | + stream.position = p |
| 117 | + |
| 118 | + if cs < 0 |
| 119 | + error("$($(reader_type)) file format error on line ", linenum) |
| 120 | + elseif finish_header |
| 121 | + $(finish_code) |
| 122 | + break |
| 123 | + elseif p > p_eof ≥ 0 |
| 124 | + error("incomplete $($(reader_type)) input on line ", linenum) |
| 125 | + else |
| 126 | + hits_eof = BufferedStreams.fillbuffer!(stream) == 0 |
| 127 | + p = stream.position |
| 128 | + p_end = stream.available |
| 129 | + if hits_eof |
| 130 | + p_eof = p_end |
| 131 | + end |
| 132 | + end |
| 133 | + end |
| 134 | + end |
| 135 | + end |
| 136 | +end |
| 137 | + |
| 138 | +function generate_read_function(reader_type, machine, init_code, actions; kwargs...) |
| 139 | + kwargs = Dict(kwargs) |
| 140 | + context = Automa.CodeGenContext( |
| 141 | + generator=get(kwargs, :generator, :goto), |
| 142 | + checkbounds=get(kwargs, :checkbounds, false), |
| 143 | + loopunroll=get(kwargs, :loopunroll, 0) |
| 144 | + ) |
| 145 | + quote |
| 146 | + function Base.read!(reader::$(reader_type), record::eltype($(reader_type)))::eltype($(reader_type)) |
| 147 | + return _read!(reader, reader.state, record) |
| 148 | + end |
| 149 | + |
| 150 | + function _read!(reader::$(reader_type), state::ReaderHelper.State, record::eltype($(reader_type))) |
| 151 | + stream = state.stream |
| 152 | + ReaderHelper.ensure_margin!(stream) |
| 153 | + cs = state.cs |
| 154 | + linenum = state.linenum |
| 155 | + data = stream.buffer |
| 156 | + p = stream.position |
| 157 | + p_end = stream.available |
| 158 | + p_eof = -1 |
| 159 | + found_record = false |
| 160 | + initialize!(record) |
| 161 | + |
| 162 | + $(init_code) |
| 163 | + |
| 164 | + if state.finished |
| 165 | + throw(EOFError()) |
| 166 | + end |
| 167 | + |
| 168 | + while true |
| 169 | + $(Automa.generate_exec_code(context, machine, actions)) |
| 170 | + |
| 171 | + state.cs = cs |
| 172 | + state.finished |= cs == 0 |
| 173 | + state.linenum = linenum |
| 174 | + stream.position = p |
| 175 | + |
| 176 | + if cs < 0 |
| 177 | + error($(reader_type), " file format error on line ", linenum, " ~>", repr(String(data[p:min(p+7,p_end)]))) |
| 178 | + elseif found_record |
| 179 | + break |
| 180 | + elseif cs == 0 |
| 181 | + throw(EOFError()) |
| 182 | + elseif p > p_eof ≥ 0 |
| 183 | + error("incomplete $($(reader_type)) input on line ", linenum) |
| 184 | + elseif BufferedStreams.available_bytes(stream) < 64 |
| 185 | + hits_eof = BufferedStreams.fillbuffer!(stream) == 0 |
| 186 | + p = stream.position |
| 187 | + p_end = stream.available |
| 188 | + if hits_eof |
| 189 | + p_eof = p_end |
| 190 | + end |
| 191 | + end |
| 192 | + end |
| 193 | + |
| 194 | + @assert isfilled(record) |
| 195 | + return record |
| 196 | + end |
| 197 | + end |
| 198 | +end |
| 199 | + |
| 200 | +end |
0 commit comments