@@ -55,6 +55,32 @@ vcz_itoa(int32_t value, char *buf)
55
55
return p ;
56
56
}
57
57
58
+ static bool
59
+ int32_all_missing (const int32_t * restrict data , size_t n )
60
+ {
61
+ size_t j ;
62
+
63
+ for (j = 0 ; j < n ; j ++ ) {
64
+ if (data [j ] != VCZ_INT_FILL && data [j ] != VCZ_INT_MISSING ) {
65
+ return false;
66
+ }
67
+ }
68
+ return true;
69
+ }
70
+
71
+ static bool
72
+ string_all_missing (const char * restrict data , size_t item_size , size_t n )
73
+ {
74
+ size_t j ;
75
+
76
+ for (j = 0 ; j < n * item_size ; j ++ ) {
77
+ if (data [j ] != VCZ_STRING_FILL && data [j ] != VCZ_STRING_MISSING ) {
78
+ return false;
79
+ }
80
+ }
81
+ return true;
82
+ }
83
+
58
84
static int64_t
59
85
string_field_write_entry (
60
86
const vcz_field_t * self , const void * data , char * dest , size_t buflen , int64_t offset )
@@ -144,36 +170,46 @@ vcz_field_write(
144
170
return vcz_field_write_entry (self , data , dest , buflen , offset );
145
171
}
146
172
147
- void
148
- vcz_field_print_state (const vcz_field_t * self , FILE * out )
173
+ static bool
174
+ vcz_info_field_is_missing (const vcz_field_t * self , size_t variant )
149
175
{
150
- fprintf (out , "\t%s\ttype:%d\titem_size=%d\tnum_columns=%d\tdata=%p\n" , self -> name ,
151
- self -> type , (int ) self -> item_size , (int ) self -> num_columns , self -> data );
176
+
177
+ size_t row_size = self -> num_columns * self -> item_size ;
178
+ const void * data = self -> data + variant * row_size ;
179
+
180
+ if (self -> type == VCZ_TYPE_INT ) {
181
+ if (self -> item_size == 4 ) {
182
+ return int32_all_missing (data , self -> num_columns );
183
+ }
184
+ } else if (self -> type == VCZ_TYPE_STRING ) {
185
+ return string_all_missing (data , self -> item_size , self -> num_columns );
186
+ }
187
+ assert (false);
188
+ return false;
152
189
}
153
190
154
- int64_t
155
- vcz_variant_encoder_write_format_specifiers (
156
- const vcz_variant_encoder_t * self , char * dest , size_t buflen , int64_t offset )
191
+ static bool
192
+ vcz_format_field_is_missing (const vcz_field_t * self , size_t variant , size_t num_samples )
157
193
{
158
- const int format_len = 7 ;
159
- size_t j ;
194
+ size_t row_size = self -> num_columns * self -> item_size * num_samples ;
195
+ const void * data = self -> data + variant * row_size ;
160
196
161
- strcpy (dest + offset , "FORMAT=" );
162
- offset += format_len ;
163
- if (self -> gt .data != NULL ) {
164
- strcpy (dest + offset , "GT" );
165
- offset += 2 ;
166
- }
167
- for (j = 0 ; j < self -> num_format_fields ; j ++ ) {
168
- dest [offset ] = ':' ;
169
- offset ++ ;
170
- strcpy (dest + offset , self -> format_fields [j ].name );
171
- offset += strlen (self -> format_fields [j ].name );
197
+ if (self -> type == VCZ_TYPE_INT ) {
198
+ if (self -> item_size == 4 ) {
199
+ return int32_all_missing (data , self -> num_columns * num_samples );
200
+ }
201
+ } else if (self -> type == VCZ_TYPE_STRING ) {
202
+ return string_all_missing (data , self -> item_size , self -> num_columns * num_samples );
172
203
}
173
- dest [offset ] = '\t' ;
174
- offset ++ ;
175
- dest [offset ] = '\0' ;
176
- return offset ;
204
+ assert (false);
205
+ return false;
206
+ }
207
+
208
+ void
209
+ vcz_field_print_state (const vcz_field_t * self , FILE * out )
210
+ {
211
+ fprintf (out , "\t%s\ttype:%d\titem_size=%d\tnum_columns=%d\tdata=%p\n" , self -> name ,
212
+ self -> type , (int ) self -> item_size , (int ) self -> num_columns , self -> data );
177
213
}
178
214
179
215
int64_t
@@ -219,98 +255,194 @@ vcz_variant_encoder_write_sample_gt(const vcz_variant_encoder_t *self, size_t va
219
255
return offset ;
220
256
}
221
257
258
+ /* int64_t */
259
+ /* vcz_variant_encoder_write_format_fields(const vcz_variant_encoder_t *self, */
260
+ /* size_t variant, size_t sample, char *dest, size_t buflen, int64_t offset) */
261
+ /* { */
262
+ /* vcz_field_t field; */
263
+ /* size_t j, row_size; */
264
+ /* const void *data; */
265
+
266
+ /* if (self->gt.data != NULL) { */
267
+ /* offset = vcz_variant_encoder_write_sample_gt( */
268
+ /* self, variant, sample, dest, buflen, offset); */
269
+ /* if (offset < 0) { */
270
+ /* goto out; */
271
+ /* } */
272
+ /* } */
273
+
274
+ /* for (j = 0; j < self->num_format_fields; j++) { */
275
+ /* field = self->format_fields[j]; */
276
+ /* dest[offset - 1] = ':'; */
277
+ /* row_size = self->num_samples * field.num_columns * field.item_size; */
278
+ /* data = field.data + variant * row_size */
279
+ /* + sample * field.num_columns * field.item_size; */
280
+ /* offset = vcz_field_write_entry(&field, data, dest, buflen, offset); */
281
+ /* if (offset < 0) { */
282
+ /* goto out; */
283
+ /* } */
284
+ /* } */
285
+ /* out: */
286
+ /* return offset; */
287
+ /* } */
288
+
222
289
int64_t
223
- vcz_variant_encoder_write_format_fields (const vcz_variant_encoder_t * self ,
224
- size_t variant , size_t sample , char * dest , size_t buflen , int64_t offset )
290
+ vcz_variant_encoder_write_info_fields (const vcz_variant_encoder_t * self , size_t variant ,
291
+ char * dest , size_t buflen , int64_t offset )
225
292
{
226
293
vcz_field_t field ;
227
- size_t j , row_size ;
228
- const void * data ;
229
-
230
- if (self -> gt .data != NULL ) {
231
- offset = vcz_variant_encoder_write_sample_gt (
232
- self , variant , sample , dest , buflen , offset );
233
- if (offset < 0 ) {
294
+ size_t j ;
295
+ bool * missing = NULL ;
296
+ bool all_missing = true;
297
+ bool first_field ;
298
+
299
+ if (self -> num_info_fields > 0 ) {
300
+ missing = malloc (self -> num_info_fields * sizeof (* missing ));
301
+ if (missing == NULL ) {
302
+ offset = VCZ_ERR_NO_MEMORY ;
234
303
goto out ;
235
304
}
305
+ for (j = 0 ; j < self -> num_info_fields ; j ++ ) {
306
+ missing [j ] = vcz_info_field_is_missing (& self -> info_fields [j ], variant );
307
+ if (!missing [j ]) {
308
+ all_missing = false;
309
+ }
310
+ }
236
311
}
237
312
238
- for (j = 0 ; j < self -> num_format_fields ; j ++ ) {
239
- field = self -> format_fields [j ];
240
- dest [offset - 1 ] = ':' ;
241
- row_size = self -> num_samples * field .num_columns * field .item_size ;
242
- data = field .data + variant * row_size
243
- + sample * field .num_columns * field .item_size ;
244
- offset = vcz_field_write_entry (& field , data , dest , buflen , offset );
245
- if (offset < 0 ) {
246
- goto out ;
313
+ if (all_missing ) {
314
+ dest [offset ] = '.' ;
315
+ offset ++ ;
316
+ dest [offset ] = '\t' ;
317
+ offset ++ ;
318
+ } else {
319
+ first_field = true;
320
+ for (j = 0 ; j < self -> num_info_fields ; j ++ ) {
321
+ if (!missing [j ]) {
322
+ if (!first_field ) {
323
+ dest [offset - 1 ] = ';' ;
324
+ }
325
+ first_field = false;
326
+ field = self -> info_fields [j ];
327
+ memcpy (dest + offset , field .name , field .name_length );
328
+ offset += field .name_length ;
329
+ dest [offset ] = '=' ;
330
+ offset ++ ;
331
+ offset = vcz_field_write (& field , variant , dest , buflen , offset );
332
+ if (offset < 0 ) {
333
+ goto out ;
334
+ }
335
+ }
247
336
}
248
337
}
249
338
out :
339
+ if (missing != NULL ) {
340
+ free (missing );
341
+ }
250
342
return offset ;
251
343
}
252
344
253
- int64_t
254
- vcz_variant_encoder_write_info_fields (const vcz_variant_encoder_t * self , size_t variant ,
255
- char * dest , size_t buflen , int64_t offset )
345
+
346
+ static int64_t
347
+ vcz_variant_encoder_write_format_fields (
348
+ const vcz_variant_encoder_t * self , size_t variant , char * buf , size_t buflen , int64_t offset )
256
349
{
350
+ size_t j , sample , row_size ;
257
351
vcz_field_t field ;
258
- size_t j ;
352
+ bool * missing = NULL ;
353
+ bool all_missing = true;
354
+ bool has_gt = (self -> gt .data != NULL );
355
+ bool gt_missing = true;
356
+ const size_t num_samples = self -> num_samples ;
357
+ const void * data ;
259
358
260
- if (self -> num_info_fields == 0 ) {
261
- dest [offset ] = '.' ;
262
- offset ++ ;
263
- dest [offset ] = '\t' ;
264
- offset ++ ;
359
+ if (has_gt ) {
360
+ gt_missing = vcz_format_field_is_missing (& self -> gt , variant , num_samples );
265
361
}
266
- for (j = 0 ; j < self -> num_info_fields ; j ++ ) {
267
- if (j > 0 ) {
268
- dest [offset - 1 ] = ';' ;
269
- }
270
- field = self -> info_fields [j ];
271
- memcpy (dest + offset , field .name , field .name_length );
272
- offset += field .name_length ;
273
- dest [offset ] = '=' ;
274
- offset ++ ;
275
- offset = vcz_field_write (& field , variant , dest , buflen , offset );
276
- if (offset < 0 ) {
362
+
363
+ if (self -> num_format_fields > 0 ) {
364
+ missing = malloc (self -> num_format_fields * sizeof (* missing ));
365
+ if (missing == NULL ) {
366
+ offset = VCZ_ERR_NO_MEMORY ;
277
367
goto out ;
278
368
}
369
+ for (j = 0 ; j < self -> num_format_fields ; j ++ ) {
370
+ missing [j ] = vcz_format_field_is_missing (& self -> format_fields [j ], variant , num_samples );
371
+ if (!missing [j ]) {
372
+ all_missing = false;
373
+ }
374
+ }
375
+ }
376
+ all_missing = all_missing && gt_missing ;
377
+
378
+ if (! all_missing ) {
379
+
380
+ if (!gt_missing ) {
381
+ strcpy (buf + offset , "GT:" );
382
+ offset += 3 ;
383
+ }
384
+ for (j = 0 ; j < self -> num_format_fields ; j ++ ) {
385
+ if (!missing [j ]) {
386
+ strcpy (buf + offset , self -> format_fields [j ].name );
387
+ offset += self -> format_fields [j ].name_length ;
388
+ buf [offset ] = ':' ;
389
+ offset ++ ;
390
+ }
391
+ }
392
+ buf [offset - 1 ] = '\t' ;
393
+
394
+ for (sample = 0 ; sample < num_samples ; sample ++ ) {
395
+ if (!gt_missing ) {
396
+ offset = vcz_variant_encoder_write_sample_gt (
397
+ self , variant , sample , buf , buflen , offset );
398
+ if (offset < 0 ) {
399
+ goto out ;
400
+ }
401
+ buf [offset - 1 ] = ':' ;
402
+ }
403
+ for (j = 0 ; j < self -> num_format_fields ; j ++ ) {
404
+ if (!missing [j ]) {
405
+ field = self -> format_fields [j ];
406
+ row_size = num_samples * field .num_columns * field .item_size ;
407
+ data = field .data + variant * row_size
408
+ + sample * field .num_columns * field .item_size ;
409
+ offset = vcz_field_write_entry (& field , data , buf , buflen , offset );
410
+ if (offset < 0 ) {
411
+ goto out ;
412
+ }
413
+ buf [offset - 1 ] = ':' ;
414
+ }
415
+ }
416
+ buf [offset - 1 ] = '\t' ;
417
+ }
279
418
}
280
419
out :
420
+ if (missing != NULL ) {
421
+ free (missing );
422
+ }
281
423
return offset ;
282
424
}
283
425
284
426
int64_t
285
427
vcz_variant_encoder_write_row (
286
- const vcz_variant_encoder_t * self , size_t row , char * buf , size_t buflen )
428
+ const vcz_variant_encoder_t * self , size_t variant , char * buf , size_t buflen )
287
429
{
288
430
int64_t offset = 0 ;
289
431
size_t j ;
290
432
291
433
for (j = 0 ; j < VCZ_NUM_FIXED_FIELDS ; j ++ ) {
292
- offset = vcz_field_write (& self -> fixed_fields [j ], row , buf , buflen , offset );
434
+ offset = vcz_field_write (& self -> fixed_fields [j ], variant , buf , buflen , offset );
293
435
if (offset < 0 ) {
294
436
goto out ;
295
437
}
296
438
}
297
- offset = vcz_variant_encoder_write_info_fields (self , row , buf , buflen , offset );
439
+ offset = vcz_variant_encoder_write_info_fields (self , variant , buf , buflen , offset );
298
440
if (offset < 0 ) {
299
441
goto out ;
300
442
}
301
- if (self -> num_samples > 0 ) {
302
- offset = vcz_variant_encoder_write_format_specifiers (self , buf , buflen , offset );
303
- if (offset < 0 ) {
304
- goto out ;
305
- }
306
- for (j = 0 ; j < self -> num_samples ; j ++ ) {
307
- /* printf("Run sample %d\n", (int) j); */
308
- offset = vcz_variant_encoder_write_format_fields (
309
- self , row , j , buf , buflen , offset );
310
- if (offset < 0 ) {
311
- goto out ;
312
- }
313
- }
443
+ offset = vcz_variant_encoder_write_format_fields (self , variant , buf , buflen , offset );
444
+ if (offset < 0 ) {
445
+ goto out ;
314
446
}
315
447
offset -- ;
316
448
buf [offset ] = '\0' ;
0 commit comments