Skip to content

Starting point for buffer safety #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 121 additions & 4 deletions lib/tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#include <float.h>
#include <vcf_encoder.h>

FILE *_devnull;
Expand All @@ -25,9 +26,47 @@ test_int_field_1d(void)

for (j = 0; j < num_rows; j++) {
ret = vcz_field_write(&field, j, buf, 1000, 0);
/* printf("%s: %s\n", buf, expected[j]); */
/* printf("ret = %d\n", (int)ret); */
/* printf("'%.*s': %s\n", (int) ret, buf, expected[j]); */
CU_ASSERT_EQUAL_FATAL(ret, strlen(expected[j]));
CU_ASSERT_STRING_EQUAL(buf, expected[j]);
CU_ASSERT_NSTRING_EQUAL(buf, expected[j], ret);
}
}

static void
test_int_field_1d_overflow(void)
{
const int32_t data[] = { 1, 2, 12345789, -100, INT32_MIN, INT32_MAX, -1 };
const size_t num_rows = sizeof(data) / sizeof(*data);
vcz_field_t field = { .name = "test",
.type = VCZ_TYPE_INT,
.item_size = 4,
.num_columns = 1,
.data = (const char *) data };
int64_t ret;
size_t j, buflen;
char *buf;

for (j = 0; j < num_rows - 1; j++) {
/* printf("%d\n", (int) data[j]); */
for (buflen = 0; buflen <= VCZ_INT32_BUF_SIZE; buflen++) {
/* printf("buflen = %d\n", (int) buflen); */
buf = malloc(buflen);
CU_ASSERT_FATAL(buf != NULL);
ret = vcz_field_write(&field, j, buf, buflen, 0);
free(buf);
CU_ASSERT_FATAL(ret == VCZ_ERR_BUFFER_OVERFLOW);
}
}
j = num_rows - 1;
CU_ASSERT_FATAL(data[j] == -1);
/* Missing data is treated differently. Just need 2 bytes for ".\t" */
for (buflen = 0; buflen < 2; buflen++) {
buf = malloc(buflen);
CU_ASSERT_FATAL(buf != NULL);
ret = vcz_field_write(&field, j, buf, buflen, 0);
free(buf);
CU_ASSERT_FATAL(ret == VCZ_ERR_BUFFER_OVERFLOW);
}
}

Expand All @@ -50,7 +89,75 @@ test_int_field_2d(void)
ret = vcz_field_write(&field, j, buf, 1000, 0);
CU_ASSERT_EQUAL_FATAL(ret, strlen(expected[j]));
/* printf("%s: %s\n", buf, expected[j]); */
CU_ASSERT_STRING_EQUAL(buf, expected[j]);
CU_ASSERT_NSTRING_EQUAL_FATAL(buf, expected[j], ret);
}
}

static void
test_float_field_1d(void)
{
float data[] = { 1.0f, 2.1f, INT32_MIN, 12345789.0f, -1, -100.123f, 0 };

const size_t num_rows = sizeof(data) / sizeof(*data);
vcz_field_t field = { .name = "test",
.type = VCZ_TYPE_FLOAT,
.item_size = 4,
.num_columns = 1,
.data = (const char *) data };
char buf[1000];
const char *expected[]
= { "1\t", "2.1\t", "-2147483648\t", "12345789\t", "-1\t", "-100.123\t", ".\t" };
int64_t ret;
size_t j;
int32_t *int_data = (int32_t *) data;

int_data[num_rows - 1] = VCZ_FLOAT32_MISSING_AS_INT32;

for (j = 0; j < num_rows; j++) {
ret = vcz_field_write(&field, j, buf, 1000, 0);
printf("ret = %d\n", (int)ret);
printf("'%.*s':'%s'\n", (int) ret, buf, expected[j]);
CU_ASSERT_EQUAL_FATAL(ret, strlen(expected[j]));
CU_ASSERT_NSTRING_EQUAL(buf, expected[j], ret);
}
}

static void
test_float_field_1d_overflow(void)
{
float data[] = { 1.0f, 2.1f, 12345789.0f, (float) M_PI, -1, -100.123f, 0 };
const size_t num_rows = sizeof(data) / sizeof(*data);
vcz_field_t field = { .name = "test",
.type = VCZ_TYPE_FLOAT,
.item_size = 4,
.num_columns = 1,
.data = (const char *) data };
int64_t ret;
size_t j, buflen;
char *buf;
int32_t *int_data = (int32_t *) data;

int_data[num_rows - 1] = VCZ_FLOAT32_MISSING_AS_INT32;

for (j = 0; j < num_rows - 1; j++) {
/* printf("%d\n", (int) data[j]); */
for (buflen = 0; buflen <= VCZ_FLOAT32_BUF_SIZE; buflen++) {
/* printf("buflen = %d\n", (int) buflen); */
buf = malloc(buflen);
CU_ASSERT_FATAL(buf != NULL);
ret = vcz_field_write(&field, j, buf, buflen, 0);
free(buf);
CU_ASSERT_FATAL(ret == VCZ_ERR_BUFFER_OVERFLOW);
}
}
j = num_rows - 1;
/* Missing data is treated differently. Just need 2 bytes for ".\t" */
for (buflen = 0; buflen < 2; buflen++) {
buf = malloc(buflen);
CU_ASSERT_FATAL(buf != NULL);
ret = vcz_field_write(&field, j, buf, buflen, 0);
free(buf);
CU_ASSERT_FATAL(ret == VCZ_ERR_BUFFER_OVERFLOW);
}
}

Expand Down Expand Up @@ -337,7 +444,14 @@ test_ftoa(void)
{2311380, "2311380"},
{16777216, "16777216"}, /* Maximum integer value of float */
{-16777216, "-16777216"},
{INT32_MIN, "-2147483648"},
{(float) INT32_MAX, "2147483648"},
{(float) DBL_MAX, "inf",},
{(float) DBL_MIN, "0",},
{FLT_MIN, "0",},
/* TODO test extreme value here, that push against the limits of f32 */
// FAILS https://github.com/jeromekelleher/vcztools/issues/21
/* {FLT_MAX, "340282346638528859811704183484516925440",}, */
};
// clang-format on
int len;
Expand All @@ -346,7 +460,7 @@ test_ftoa(void)

for (j = 0; j < sizeof(cases) / sizeof(*cases); j++) {
len = vcz_ftoa(buf, cases[j].val);
/* printf("j = %d %f->%s=='%s'\n", j, cases[j].val, cases[j].expected, buf); */
/* printf("j = %d %f->%s=='%s'\n", (int) j, cases[j].val, cases[j].expected, buf); */
CU_ASSERT_EQUAL_FATAL(len, strlen(cases[j].expected));
CU_ASSERT_STRING_EQUAL_FATAL(buf, cases[j].expected);
}
Expand Down Expand Up @@ -441,7 +555,10 @@ main(int argc, char **argv)
{ "test_string_field_1d", test_string_field_1d },
{ "test_string_field_2d", test_string_field_2d },
{ "test_int_field_1d", test_int_field_1d },
{ "test_int_field_1d_overflow", test_int_field_1d_overflow },
{ "test_int_field_2d", test_int_field_2d },
{ "test_float_field_1d", test_float_field_1d },
{ "test_float_field_1d_overflow", test_float_field_1d_overflow },
{ "test_variant_encoder_minimal", test_variant_encoder_minimal },
{ "test_variant_fields_all_missing", test_variant_encoder_fields_all_missing },
{ "test_itoa_small", test_itoa_small },
Expand Down
117 changes: 75 additions & 42 deletions lib/vcf_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
#include <math.h>

int
vcz_itoa(char *buf, int32_t v)
vcz_itoa(char *restrict buf, int64_t value)
{
int64_t value = v;
int p = 0;
int j, k;

Expand All @@ -19,6 +18,9 @@ vcz_itoa(char *buf, int32_t v)
p++;
value = -value;
}
/* We only support int32_t values. The +1 here is for supporting the
* float converter below */
assert(value <= (1LL + INT32_MAX));
/* special case small values */
if (value < 10) {
buf[p] = (char) value + '0';
Expand All @@ -42,9 +44,8 @@ vcz_itoa(char *buf, int32_t v)
} else if (value < 1000000000) {
k = 8;
} else if (value < 10000000000) {
// Largest possible INT32 value
k = 9;
} else {
assert(false);
}

// iterate backwards in buf
Expand All @@ -62,7 +63,7 @@ vcz_itoa(char *buf, int32_t v)
}

int
vcz_ftoa(char *buf, float value)
vcz_ftoa(char *restrict buf, float value)
{
int p = 0;
int64_t i, d1, d2, d3;
Expand All @@ -83,7 +84,9 @@ vcz_ftoa(char *buf, float value)

/* integer part */
i = (int64_t) round(((double) value) * 1000);
p += vcz_itoa(buf + p, (int32_t)(i / 1000));
/* printf("i = %ld\n", i); */
/* printf("i/ 1000 = %ld\n", i / 1000); */
p += vcz_itoa(buf + p, i / 1000);

/* fractional part */
d3 = i % 10;
Expand All @@ -107,6 +110,41 @@ vcz_ftoa(char *buf, float value)
return p;
}

static inline int64_t
append_char(char *restrict dest, char c, int64_t offset, int64_t buflen)
{
if (offset == buflen) {
return VCZ_ERR_BUFFER_OVERFLOW;
}
dest[offset] = c;
return offset + 1;
}

static inline int64_t
append_int(char *restrict dest, int32_t value, int64_t offset, int64_t buflen)
{
if (value == VCZ_INT_MISSING) {
return append_char(dest, '.', offset, buflen);
}
if (offset + VCZ_INT32_BUF_SIZE >= buflen) {
return VCZ_ERR_BUFFER_OVERFLOW;
}
return offset + vcz_itoa(dest + offset, value);
}

static inline int64_t
append_float(char *restrict dest, int32_t int32_value, float value, int64_t offset,
int64_t buflen)
{
if (int32_value == VCZ_FLOAT32_MISSING_AS_INT32) {
return append_char(dest, '.', offset, buflen);
}
if (offset + VCZ_FLOAT32_BUF_SIZE >= buflen) {
return VCZ_ERR_BUFFER_OVERFLOW;
}
return offset + vcz_ftoa(dest + offset, value);
}

static bool
bool_all_missing(const int8_t *restrict data, size_t n)
{
Expand Down Expand Up @@ -192,42 +230,39 @@ bool_field_write_entry(const vcz_field_t *VCZ_UNUSED(self), const void *VCZ_UNUS
}

static int64_t
int32_field_write_entry(const vcz_field_t *self, const void *data, char *dest,
size_t VCZ_UNUSED(buflen), int64_t offset)
int32_field_write_entry(const vcz_field_t *self, const void *restrict data, char *dest,
int64_t buflen, int64_t offset)
{
const int32_t *source = (const int32_t *) data;
int32_t value;
const int32_t *restrict source = (const int32_t *) data;
size_t column;

for (column = 0; column < self->num_columns; column++) {
value = source[column];
if (value != VCZ_INT_FILL) {
if (column > 0) {
dest[offset] = ',';
offset++;
}
if (value == VCZ_INT_MISSING) {
dest[offset] = '.';
offset++;
} else {
offset += vcz_itoa(dest + offset, value);
if (source[column] == VCZ_INT_FILL) {
break;
}
if (column > 0) {
offset = append_char(dest, ',', offset, buflen);
if (offset < 0) {
goto out;
}
}
offset = append_int(dest, source[column], offset, buflen);
if (offset < 0) {
goto out;
}
}
dest[offset] = '\t';
offset++;
dest[offset] = '\0';
offset = append_char(dest, '\t', offset, buflen);
out:
return offset;
}

static int64_t
float32_field_write_entry(const vcz_field_t *self, const void *data, char *dest,
size_t VCZ_UNUSED(buflen), int64_t offset)
float32_field_write_entry(const vcz_field_t *self, const void *restrict data,
char *restrict dest, int64_t buflen, int64_t offset)
{
const float *source = (const float *) data;
const int32_t *int32_source = (const int32_t *) data;
const float *restrict source = (const float *restrict) data;
const int32_t *restrict int32_source = (const int32_t *restrict) data;
int32_t int32_value;
float value;
size_t column;

for (column = 0; column < self->num_columns; column++) {
Expand All @@ -236,20 +271,18 @@ float32_field_write_entry(const vcz_field_t *self, const void *data, char *dest,
break;
}
if (column > 0) {
dest[offset] = ',';
offset++;
offset = append_char(dest, ',', offset, buflen);
if (offset < 0) {
goto out;
}
}
if (int32_value == VCZ_FLOAT32_MISSING_AS_INT32) {
dest[offset] = '.';
offset++;
} else {
value = source[column];
offset += vcz_ftoa(dest + offset, value);
offset = append_float(dest, int32_value, source[column], offset, buflen);
if (offset < 0) {
goto out;
}
}
dest[offset] = '\t';
offset++;
dest[offset] = '\0';
offset = append_char(dest, '\t', offset, buflen);
out:
return offset;
}

Expand All @@ -259,11 +292,11 @@ vcz_field_write_entry(
{
if (self->type == VCZ_TYPE_INT) {
if (self->item_size == 4) {
return int32_field_write_entry(self, data, dest, buflen, offset);
return int32_field_write_entry(self, data, dest, (int64_t) buflen, offset);
}
} else if (self->type == VCZ_TYPE_FLOAT) {
assert(self->item_size == 4);
return float32_field_write_entry(self, data, dest, buflen, offset);
return float32_field_write_entry(self, data, dest, (int64_t) buflen, offset);
} else if (self->type == VCZ_TYPE_BOOL) {
assert(self->item_size == 1);
assert(self->num_columns == 1);
Expand Down
5 changes: 4 additions & 1 deletion lib/vcf_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@

// arbitrary - we can increase if needs be
#define VCZ_MAX_FIELD_NAME_LEN 256
#define VCZ_INT32_BUF_SIZE 11 // -2147483648
#define VCZ_FLOAT32_BUF_SIZE 15 // An int + "." and 3 decimal places

#define VCZ_ERR_NO_MEMORY (-100)
#define VCZ_ERR_BUFFER_OVERFLOW (-101)

/* Built-in-limitations */
#define VCZ_ERR_FIELD_NAME_TOO_LONG (-201)
Expand Down Expand Up @@ -93,5 +96,5 @@ int vcz_variant_encoder_add_info_field(vcz_variant_encoder_t *self, const char *
int64_t vcz_variant_encoder_write_row(
const vcz_variant_encoder_t *self, size_t row, char *buf, size_t buflen);

int vcz_itoa(char *buf, int32_t v);
int vcz_itoa(char *buf, int64_t v);
int vcz_ftoa(char *buf, float v);