1
1
# GFF3 Reader
2
2
# ===========
3
3
4
- mutable struct Reader <: BioCore .IO.AbstractReader
5
- state:: BioCore.Ragel .State
4
+ mutable struct Reader{S <: TranscodingStream } <: BioGenerics .IO.AbstractReader
5
+ state:: BioGenerics.Automa .State{S}
6
6
index:: Union{Indexes.Tabix, Nothing}
7
7
save_directives:: Bool
8
8
targets:: Vector{Symbol}
@@ -11,8 +11,9 @@ mutable struct Reader <: BioCore.IO.AbstractReader
11
11
directive_count:: Int
12
12
preceding_directive_count:: Int
13
13
14
- function Reader (input:: BufferedStreams.BufferedInputStream , index= nothing , save_directives:: Bool = false , skip_features:: Bool = false , skip_directives:: Bool = true , skip_comments:: Bool = true )
15
- if isa (index, Indexes. Tabix) && ! isa (input. source, BGZFStreams. BGZFStream)
14
+ function Reader (input:: S , index= nothing , save_directives:: Bool = false , skip_features:: Bool = false , skip_directives:: Bool = true , skip_comments:: Bool = true ) where S <: TranscodingStream
15
+
16
+ if isa (index, Indexes. Tabix) && ! isa (input. stream, BGZFStreams. BGZFStream)
16
17
throw (ArgumentError (" not a BGZF stream" ))
17
18
end
18
19
targets = Symbol[]
@@ -25,7 +26,7 @@ mutable struct Reader <: BioCore.IO.AbstractReader
25
26
if ! skip_comments
26
27
push! (targets, :comment )
27
28
end
28
- return new (BioCore . Ragel . State (body_machine. start_state, input ), index, save_directives, targets, false , Record[], 0 , 0 )
29
+ return new {S} (BioGenerics . Automa . State (input, body_machine. start_state, 1 , false ), index, save_directives, targets, false , Record[], 0 , 0 )
29
30
end
30
31
end
31
32
@@ -63,7 +64,15 @@ function Reader(input::IO; index=nothing, save_directives::Bool=false, skip_feat
63
64
if isa (index, AbstractString)
64
65
index = Indexes. Tabix (index)
65
66
end
66
- return Reader (BufferedStreams. BufferedInputStream (input), index, save_directives, skip_features, skip_directives, skip_comments)
67
+
68
+ if isa (input, TranscodingStream)
69
+ return Reader (input, index, save_directives, skip_features, skip_directives, skip_comments)
70
+ end
71
+
72
+ stream = TranscodingStreams. NoopStream (input)
73
+
74
+ return Reader (stream, index, save_directives, skip_features, skip_directives, skip_comments)
75
+
67
76
end
68
77
69
78
function Reader (filepath:: AbstractString ; index= :auto , save_directives:: Bool = false , skip_features:: Bool = false , skip_directives:: Bool = true , skip_comments:: Bool = true )
@@ -85,19 +94,35 @@ function Base.eltype(::Type{<:Reader})
85
94
return Record
86
95
end
87
96
88
- function BioCore . IO. stream (reader:: Reader )
97
+ function BioGenerics . IO. stream (reader:: Reader )
89
98
return reader. state. stream
90
99
end
91
100
92
101
function Base. eof (reader:: Reader )
93
- return reader. state. finished || eof (reader. state. stream)
102
+ return reader. state. filled || eof (reader. state. stream)
94
103
end
95
104
96
105
function Base. close (reader:: Reader )
97
106
# make trailing directives accessable
98
107
reader. directive_count = reader. preceding_directive_count
99
108
reader. preceding_directive_count = 0
100
- close (BioCore. IO. stream (reader))
109
+ close (BioGenerics. IO. stream (reader))
110
+ end
111
+
112
+ function Base. read! (reader:: Reader , record:: Record )
113
+ return readrecord! (reader. state. stream, reader, record)
114
+ end
115
+
116
+ function index! (record:: Record )
117
+ stream = TranscodingStreams. NoopStream (IOBuffer (record. data))
118
+ return index! (stream, record)
119
+ end
120
+
121
+ function Base. iterate (reader:: Reader , nextone:: Record = Record ())
122
+ if BioGenerics. IO. tryread! (reader, nextone) === nothing
123
+ return nothing
124
+ end
125
+ return copy (nextone), empty! (nextone)
101
126
end
102
127
103
128
function IntervalCollection (reader:: Reader )
@@ -148,6 +173,14 @@ function getfasta(reader::Reader) #TODO: move responsibility to FASTX.jl.
148
173
return FASTA. Reader (reader. state. stream)
149
174
end
150
175
176
+ function appendfrom! (dst, dpos, src, spos, n)
177
+ if length (dst) < dpos + n - 1
178
+ resize! (dst, dpos + n - 1 )
179
+ end
180
+ unsafe_copyto! (dst, dpos, src, spos, n)
181
+ return dst
182
+ end
183
+
151
184
const record_machine, body_machine = (function ()
152
185
cat = Automa. RegExp. cat
153
186
rep = Automa. RegExp. rep
@@ -157,27 +190,27 @@ const record_machine, body_machine = (function ()
157
190
158
191
feature = let
159
192
seqid = re " [a-zA-Z0-9.:^*$@!+_?\- |%]*"
160
- seqid. actions[:enter ] = [:mark ]
193
+ seqid. actions[:enter ] = [:pos ]
161
194
seqid. actions[:exit ] = [:feature_seqid ]
162
195
163
196
source = re " [ -~]*"
164
- source. actions[:enter ] = [:mark ]
197
+ source. actions[:enter ] = [:pos ]
165
198
source. actions[:exit ] = [:feature_source ]
166
199
167
200
type_ = re " [ -~]*"
168
- type_. actions[:enter ] = [:mark ]
201
+ type_. actions[:enter ] = [:pos ]
169
202
type_. actions[:exit ] = [:feature_type_ ]
170
203
171
204
start = re " [0-9]+|\. "
172
- start. actions[:enter ] = [:mark ]
205
+ start. actions[:enter ] = [:pos ]
173
206
start. actions[:exit ] = [:feature_start ]
174
207
175
208
end_ = re " [0-9]+|\. "
176
- end_. actions[:enter ] = [:mark ]
209
+ end_. actions[:enter ] = [:pos ]
177
210
end_. actions[:exit ] = [:feature_end_ ]
178
211
179
212
score = re " [ -~]*[0-9][ -~]*|\. "
180
- score. actions[:enter ] = [:mark ]
213
+ score. actions[:enter ] = [:pos ]
181
214
score. actions[:exit ] = [:feature_score ]
182
215
183
216
strand = re " [+\- ?]|\. "
@@ -189,7 +222,7 @@ const record_machine, body_machine = (function ()
189
222
attributes = let
190
223
char = re " [^=;,\t\r\n ]"
191
224
key = rep1 (char)
192
- key. actions[:enter ] = [:mark ]
225
+ key. actions[:enter ] = [:pos ]
193
226
key. actions[:exit ] = [:feature_attribute_key ]
194
227
val = rep (char)
195
228
attr = cat (key, ' =' , val, rep (cat (' ,' , val)))
@@ -216,7 +249,7 @@ const record_machine, body_machine = (function ()
216
249
comment. actions[:exit ] = [:comment ]
217
250
218
251
record = alt (feature, directive, comment)
219
- record. actions[:enter ] = [:anchor ]
252
+ record. actions[:enter ] = [:mark ]
220
253
record. actions[:exit ] = [:record ]
221
254
222
255
blank = re " [ \t ]*"
@@ -238,46 +271,57 @@ const record_machine, body_machine = (function ()
238
271
end )()
239
272
240
273
const record_actions = Dict (
241
- :feature_seqid => :(record. seqid = (mark: p- 1 ) .- offset),
242
- :feature_source => :(record. source = (mark: p- 1 ) .- offset),
243
- :feature_type_ => :(record. type_ = (mark: p- 1 ) .- offset),
244
- :feature_start => :(record. start = (mark: p- 1 ) .- offset),
245
- :feature_end_ => :(record. end_ = (mark: p- 1 ) .- offset),
246
- :feature_score => :(record. score = (mark: p- 1 ) .- offset),
247
- :feature_strand => :(record. strand = p - offset),
248
- :feature_phase => :(record. phase = p - offset),
249
- :feature_attribute_key => :(push! (record. attribute_keys, (mark: p- 1 ) .- offset)),
274
+ :mark => :(@mark ),
275
+ :pos => :(pos = @relpos (p)),
276
+ :feature_seqid => :(record. seqid = pos: @relpos (p- 1 )),
277
+ :feature_source => :(record. source = pos: @relpos (p- 1 )),
278
+ :feature_type_ => :(record. type_ = pos: @relpos (p- 1 )),
279
+ :feature_start => :(record. start = pos: @relpos (p- 1 )),
280
+ :feature_end_ => :(record. end_ = pos: @relpos (p- 1 )),
281
+ :feature_score => :(record. score = pos: @relpos (p- 1 )),
282
+ :feature_strand => :(record. strand = @relpos (p)),
283
+ :feature_phase => :(record. phase = @relpos (p)),
284
+ :feature_attribute_key => :(push! (record. attribute_keys, pos: @relpos (p- 1 ))),
250
285
:feature => :(record. kind = :feature ),
251
286
:directive => :(record. kind = :directive ),
252
287
:comment => :(record. kind = :comment ),
253
288
:record => quote
254
- BioCore. ReaderHelper. resize_and_copy! (record. data, data, 1 : p- 1 )
255
- record. filled = (offset+ 1 : p- 1 ) .- offset
256
- end ,
257
- :anchor => :(),
258
- :mark => :(mark = p)
289
+ appendfrom! (record. data, 1 , data, @markpos , p- @markpos )
290
+ record. filled = 1 : (p- @markpos )
291
+ end
259
292
)
260
293
261
- BioCore. ReaderHelper. generate_index_function (
262
- Record,
294
+ context = Automa. CodeGenContext (
295
+ generator = :goto ,
296
+ checkbounds = false ,
297
+ loopunroll = 0
298
+ )
299
+
300
+ Automa. Stream. generate_reader (
301
+ :index! ,
263
302
record_machine,
264
- quote
265
- mark = offset = 0
266
- end ,
267
- record_actions
303
+ arguments = (:(record:: Record ),),
304
+ actions = record_actions,
305
+ context = context,
306
+ returncode = quote
307
+ if cs == 0
308
+ return record
309
+ end
310
+ throw (ArgumentError (string (" failed to index " , eltype (record), " ~>" , repr (String (data[p: min (p+ 7 ,p_end)])))))
311
+ end
268
312
) |> eval
269
313
270
- BioCore. ReaderHelper. generate_read_function (
271
- Reader,
314
+
315
+ Automa. Stream. generate_reader (
316
+ :readrecord! ,
272
317
body_machine,
273
- quote
274
- mark = offset = 0
275
- end ,
276
- merge (record_actions,
318
+ arguments = (:(reader:: Reader ), :(record:: Record )),
319
+ actions = merge (record_actions,
277
320
Dict (
321
+ :countline => :(linenum += 1 ),
278
322
:record => quote
279
- BioCore . ReaderHelper . resize_and_copy ! (record. data, data, BioCore . ReaderHelper . upanchor! (stream) : p - 1 )
280
- record. filled = (offset + 1 : p - 1 ) .- offset
323
+ appendfrom ! (record. data, 1 , data, @markpos , p - @markpos )
324
+ record. filled = 1 : (p - @markpos )
281
325
if isfeature (record)
282
326
reader. directive_count = reader. preceding_directive_count
283
327
reader. preceding_directive_count = 0
@@ -288,9 +332,10 @@ BioCore.ReaderHelper.generate_read_function(
288
332
end
289
333
if is_fasta_directive (record)
290
334
reader. found_fasta = true
291
- reader. state. finished = true
335
+ reader. state. filled = true
292
336
end
293
337
end
338
+
294
339
if record. kind ∈ reader. targets
295
340
found_record = true
296
341
@escape
@@ -299,14 +344,46 @@ BioCore.ReaderHelper.generate_read_function(
299
344
:body => quote
300
345
if data[p] == UInt8 (' >' )
301
346
reader. found_fasta = true
302
- reader. state. finished = true
347
+ reader. state. filled = true
303
348
# HACK: any better way?
304
349
cs = 0
305
350
@goto exit
306
351
end
307
352
end ,
308
- :countline => :(linenum += 1 ),
309
- :anchor => :(BioCore. ReaderHelper. anchor! (stream, p); offset = p - 1 )
310
353
)
311
- )
354
+ ),
355
+ context = context,
356
+ initcode = quote
357
+ cs = reader. state. state
358
+ linenum = reader. state. linenum
359
+ found_record = false
360
+ end ,
361
+ loopcode = quote
362
+ if found_record
363
+ @goto __return__
364
+ end
365
+ end ,
366
+ returncode = quote
367
+
368
+ reader. state. state = cs
369
+ # reader.state.filled |= cs == 0 # Note: if set to true, remains true.
370
+ reader. state. linenum = linenum
371
+
372
+ if found_record
373
+ return record
374
+ end
375
+
376
+ if cs == 0 || eof (stream)
377
+ throw (EOFError ())
378
+ end
379
+
380
+ if cs < 0
381
+ error (eltype (Reader), " file format error on line " , linenum, " ~>" , repr (String (data[p: min (p+ 7 ,p_end)])))
382
+ end
383
+
384
+ if p > p_eof ≥ 0
385
+ error (" incomplete $(typeof (reader)) input on line " , linenum)
386
+ end
387
+
388
+ end
312
389
) |> eval
0 commit comments