diff --git a/lib/tests.c b/lib/tests.c index c8d1731..3816951 100644 --- a/lib/tests.c +++ b/lib/tests.c @@ -4,6 +4,7 @@ #include #include #include +#include #include FILE *_devnull; @@ -25,9 +26,47 @@ test_int_field_1d(void) for (j = 0; j < num_rows; j++) { ret = vcz_field_write(&field, j, buf, 1000, 0); - /* printf("%s: %s\n", buf, expected[j]); */ + /* printf("ret = %d\n", (int)ret); */ + /* printf("'%.*s': %s\n", (int) ret, buf, expected[j]); */ CU_ASSERT_EQUAL_FATAL(ret, strlen(expected[j])); - CU_ASSERT_STRING_EQUAL(buf, expected[j]); + CU_ASSERT_NSTRING_EQUAL(buf, expected[j], ret); + } +} + +static void +test_int_field_1d_overflow(void) +{ + const int32_t data[] = { 1, 2, 12345789, -100, INT32_MIN, INT32_MAX, -1 }; + const size_t num_rows = sizeof(data) / sizeof(*data); + vcz_field_t field = { .name = "test", + .type = VCZ_TYPE_INT, + .item_size = 4, + .num_columns = 1, + .data = (const char *) data }; + int64_t ret; + size_t j, buflen; + char *buf; + + for (j = 0; j < num_rows - 1; j++) { + /* printf("%d\n", (int) data[j]); */ + for (buflen = 0; buflen <= VCZ_INT32_BUF_SIZE; buflen++) { + /* printf("buflen = %d\n", (int) buflen); */ + buf = malloc(buflen); + CU_ASSERT_FATAL(buf != NULL); + ret = vcz_field_write(&field, j, buf, buflen, 0); + free(buf); + CU_ASSERT_FATAL(ret == VCZ_ERR_BUFFER_OVERFLOW); + } + } + j = num_rows - 1; + CU_ASSERT_FATAL(data[j] == -1); + /* Missing data is treated differently. Just need 2 bytes for ".\t" */ + for (buflen = 0; buflen < 2; buflen++) { + buf = malloc(buflen); + CU_ASSERT_FATAL(buf != NULL); + ret = vcz_field_write(&field, j, buf, buflen, 0); + free(buf); + CU_ASSERT_FATAL(ret == VCZ_ERR_BUFFER_OVERFLOW); } } @@ -50,7 +89,75 @@ test_int_field_2d(void) ret = vcz_field_write(&field, j, buf, 1000, 0); CU_ASSERT_EQUAL_FATAL(ret, strlen(expected[j])); /* printf("%s: %s\n", buf, expected[j]); */ - CU_ASSERT_STRING_EQUAL(buf, expected[j]); + CU_ASSERT_NSTRING_EQUAL_FATAL(buf, expected[j], ret); + } +} + +static void +test_float_field_1d(void) +{ + float data[] = { 1.0f, 2.1f, INT32_MIN, 12345789.0f, -1, -100.123f, 0 }; + + const size_t num_rows = sizeof(data) / sizeof(*data); + vcz_field_t field = { .name = "test", + .type = VCZ_TYPE_FLOAT, + .item_size = 4, + .num_columns = 1, + .data = (const char *) data }; + char buf[1000]; + const char *expected[] + = { "1\t", "2.1\t", "-2147483648\t", "12345789\t", "-1\t", "-100.123\t", ".\t" }; + int64_t ret; + size_t j; + int32_t *int_data = (int32_t *) data; + + int_data[num_rows - 1] = VCZ_FLOAT32_MISSING_AS_INT32; + + for (j = 0; j < num_rows; j++) { + ret = vcz_field_write(&field, j, buf, 1000, 0); + printf("ret = %d\n", (int)ret); + printf("'%.*s':'%s'\n", (int) ret, buf, expected[j]); + CU_ASSERT_EQUAL_FATAL(ret, strlen(expected[j])); + CU_ASSERT_NSTRING_EQUAL(buf, expected[j], ret); + } +} + +static void +test_float_field_1d_overflow(void) +{ + float data[] = { 1.0f, 2.1f, 12345789.0f, (float) M_PI, -1, -100.123f, 0 }; + const size_t num_rows = sizeof(data) / sizeof(*data); + vcz_field_t field = { .name = "test", + .type = VCZ_TYPE_FLOAT, + .item_size = 4, + .num_columns = 1, + .data = (const char *) data }; + int64_t ret; + size_t j, buflen; + char *buf; + int32_t *int_data = (int32_t *) data; + + int_data[num_rows - 1] = VCZ_FLOAT32_MISSING_AS_INT32; + + for (j = 0; j < num_rows - 1; j++) { + /* printf("%d\n", (int) data[j]); */ + for (buflen = 0; buflen <= VCZ_FLOAT32_BUF_SIZE; buflen++) { + /* printf("buflen = %d\n", (int) buflen); */ + buf = malloc(buflen); + CU_ASSERT_FATAL(buf != NULL); + ret = vcz_field_write(&field, j, buf, buflen, 0); + free(buf); + CU_ASSERT_FATAL(ret == VCZ_ERR_BUFFER_OVERFLOW); + } + } + j = num_rows - 1; + /* Missing data is treated differently. Just need 2 bytes for ".\t" */ + for (buflen = 0; buflen < 2; buflen++) { + buf = malloc(buflen); + CU_ASSERT_FATAL(buf != NULL); + ret = vcz_field_write(&field, j, buf, buflen, 0); + free(buf); + CU_ASSERT_FATAL(ret == VCZ_ERR_BUFFER_OVERFLOW); } } @@ -337,7 +444,14 @@ test_ftoa(void) {2311380, "2311380"}, {16777216, "16777216"}, /* Maximum integer value of float */ {-16777216, "-16777216"}, + {INT32_MIN, "-2147483648"}, + {(float) INT32_MAX, "2147483648"}, + {(float) DBL_MAX, "inf",}, + {(float) DBL_MIN, "0",}, + {FLT_MIN, "0",}, /* TODO test extreme value here, that push against the limits of f32 */ + // FAILS https://github.com/jeromekelleher/vcztools/issues/21 + /* {FLT_MAX, "340282346638528859811704183484516925440",}, */ }; // clang-format on int len; @@ -346,7 +460,7 @@ test_ftoa(void) for (j = 0; j < sizeof(cases) / sizeof(*cases); j++) { len = vcz_ftoa(buf, cases[j].val); - /* printf("j = %d %f->%s=='%s'\n", j, cases[j].val, cases[j].expected, buf); */ + /* printf("j = %d %f->%s=='%s'\n", (int) j, cases[j].val, cases[j].expected, buf); */ CU_ASSERT_EQUAL_FATAL(len, strlen(cases[j].expected)); CU_ASSERT_STRING_EQUAL_FATAL(buf, cases[j].expected); } @@ -441,7 +555,10 @@ main(int argc, char **argv) { "test_string_field_1d", test_string_field_1d }, { "test_string_field_2d", test_string_field_2d }, { "test_int_field_1d", test_int_field_1d }, + { "test_int_field_1d_overflow", test_int_field_1d_overflow }, { "test_int_field_2d", test_int_field_2d }, + { "test_float_field_1d", test_float_field_1d }, + { "test_float_field_1d_overflow", test_float_field_1d_overflow }, { "test_variant_encoder_minimal", test_variant_encoder_minimal }, { "test_variant_fields_all_missing", test_variant_encoder_fields_all_missing }, { "test_itoa_small", test_itoa_small }, diff --git a/lib/vcf_encoder.c b/lib/vcf_encoder.c index c1e95bb..9a6b28a 100644 --- a/lib/vcf_encoder.c +++ b/lib/vcf_encoder.c @@ -8,9 +8,8 @@ #include int -vcz_itoa(char *buf, int32_t v) +vcz_itoa(char *restrict buf, int64_t value) { - int64_t value = v; int p = 0; int j, k; @@ -19,6 +18,9 @@ vcz_itoa(char *buf, int32_t v) p++; value = -value; } + /* We only support int32_t values. The +1 here is for supporting the + * float converter below */ + assert(value <= (1LL + INT32_MAX)); /* special case small values */ if (value < 10) { buf[p] = (char) value + '0'; @@ -42,9 +44,8 @@ vcz_itoa(char *buf, int32_t v) } else if (value < 1000000000) { k = 8; } else if (value < 10000000000) { + // Largest possible INT32 value k = 9; - } else { - assert(false); } // iterate backwards in buf @@ -62,7 +63,7 @@ vcz_itoa(char *buf, int32_t v) } int -vcz_ftoa(char *buf, float value) +vcz_ftoa(char *restrict buf, float value) { int p = 0; int64_t i, d1, d2, d3; @@ -83,7 +84,9 @@ vcz_ftoa(char *buf, float value) /* integer part */ i = (int64_t) round(((double) value) * 1000); - p += vcz_itoa(buf + p, (int32_t)(i / 1000)); + /* printf("i = %ld\n", i); */ + /* printf("i/ 1000 = %ld\n", i / 1000); */ + p += vcz_itoa(buf + p, i / 1000); /* fractional part */ d3 = i % 10; @@ -107,6 +110,41 @@ vcz_ftoa(char *buf, float value) return p; } +static inline int64_t +append_char(char *restrict dest, char c, int64_t offset, int64_t buflen) +{ + if (offset == buflen) { + return VCZ_ERR_BUFFER_OVERFLOW; + } + dest[offset] = c; + return offset + 1; +} + +static inline int64_t +append_int(char *restrict dest, int32_t value, int64_t offset, int64_t buflen) +{ + if (value == VCZ_INT_MISSING) { + return append_char(dest, '.', offset, buflen); + } + if (offset + VCZ_INT32_BUF_SIZE >= buflen) { + return VCZ_ERR_BUFFER_OVERFLOW; + } + return offset + vcz_itoa(dest + offset, value); +} + +static inline int64_t +append_float(char *restrict dest, int32_t int32_value, float value, int64_t offset, + int64_t buflen) +{ + if (int32_value == VCZ_FLOAT32_MISSING_AS_INT32) { + return append_char(dest, '.', offset, buflen); + } + if (offset + VCZ_FLOAT32_BUF_SIZE >= buflen) { + return VCZ_ERR_BUFFER_OVERFLOW; + } + return offset + vcz_ftoa(dest + offset, value); +} + static bool bool_all_missing(const int8_t *restrict data, size_t n) { @@ -192,42 +230,39 @@ bool_field_write_entry(const vcz_field_t *VCZ_UNUSED(self), const void *VCZ_UNUS } static int64_t -int32_field_write_entry(const vcz_field_t *self, const void *data, char *dest, - size_t VCZ_UNUSED(buflen), int64_t offset) +int32_field_write_entry(const vcz_field_t *self, const void *restrict data, char *dest, + int64_t buflen, int64_t offset) { - const int32_t *source = (const int32_t *) data; - int32_t value; + const int32_t *restrict source = (const int32_t *) data; size_t column; for (column = 0; column < self->num_columns; column++) { - value = source[column]; - if (value != VCZ_INT_FILL) { - if (column > 0) { - dest[offset] = ','; - offset++; - } - if (value == VCZ_INT_MISSING) { - dest[offset] = '.'; - offset++; - } else { - offset += vcz_itoa(dest + offset, value); + if (source[column] == VCZ_INT_FILL) { + break; + } + if (column > 0) { + offset = append_char(dest, ',', offset, buflen); + if (offset < 0) { + goto out; } } + offset = append_int(dest, source[column], offset, buflen); + if (offset < 0) { + goto out; + } } - dest[offset] = '\t'; - offset++; - dest[offset] = '\0'; + offset = append_char(dest, '\t', offset, buflen); +out: return offset; } static int64_t -float32_field_write_entry(const vcz_field_t *self, const void *data, char *dest, - size_t VCZ_UNUSED(buflen), int64_t offset) +float32_field_write_entry(const vcz_field_t *self, const void *restrict data, + char *restrict dest, int64_t buflen, int64_t offset) { - const float *source = (const float *) data; - const int32_t *int32_source = (const int32_t *) data; + const float *restrict source = (const float *restrict) data; + const int32_t *restrict int32_source = (const int32_t *restrict) data; int32_t int32_value; - float value; size_t column; for (column = 0; column < self->num_columns; column++) { @@ -236,20 +271,18 @@ float32_field_write_entry(const vcz_field_t *self, const void *data, char *dest, break; } if (column > 0) { - dest[offset] = ','; - offset++; + offset = append_char(dest, ',', offset, buflen); + if (offset < 0) { + goto out; + } } - if (int32_value == VCZ_FLOAT32_MISSING_AS_INT32) { - dest[offset] = '.'; - offset++; - } else { - value = source[column]; - offset += vcz_ftoa(dest + offset, value); + offset = append_float(dest, int32_value, source[column], offset, buflen); + if (offset < 0) { + goto out; } } - dest[offset] = '\t'; - offset++; - dest[offset] = '\0'; + offset = append_char(dest, '\t', offset, buflen); +out: return offset; } @@ -259,11 +292,11 @@ vcz_field_write_entry( { if (self->type == VCZ_TYPE_INT) { if (self->item_size == 4) { - return int32_field_write_entry(self, data, dest, buflen, offset); + return int32_field_write_entry(self, data, dest, (int64_t) buflen, offset); } } else if (self->type == VCZ_TYPE_FLOAT) { assert(self->item_size == 4); - return float32_field_write_entry(self, data, dest, buflen, offset); + return float32_field_write_entry(self, data, dest, (int64_t) buflen, offset); } else if (self->type == VCZ_TYPE_BOOL) { assert(self->item_size == 1); assert(self->num_columns == 1); diff --git a/lib/vcf_encoder.h b/lib/vcf_encoder.h index 2550c7b..3c53490 100644 --- a/lib/vcf_encoder.h +++ b/lib/vcf_encoder.h @@ -31,8 +31,11 @@ // arbitrary - we can increase if needs be #define VCZ_MAX_FIELD_NAME_LEN 256 +#define VCZ_INT32_BUF_SIZE 11 // -2147483648 +#define VCZ_FLOAT32_BUF_SIZE 15 // An int + "." and 3 decimal places #define VCZ_ERR_NO_MEMORY (-100) +#define VCZ_ERR_BUFFER_OVERFLOW (-101) /* Built-in-limitations */ #define VCZ_ERR_FIELD_NAME_TOO_LONG (-201) @@ -93,5 +96,5 @@ int vcz_variant_encoder_add_info_field(vcz_variant_encoder_t *self, const char * int64_t vcz_variant_encoder_write_row( const vcz_variant_encoder_t *self, size_t row, char *buf, size_t buflen); -int vcz_itoa(char *buf, int32_t v); +int vcz_itoa(char *buf, int64_t v); int vcz_ftoa(char *buf, float v);