Skip to content

Commit 2e5b809

Browse files
committed
Make revised search algorithm thread safe
In commit 85b1c53, I introduced a new "max_len" variable, used by the search() function to store the maximum match length found in course of the binary search. However, I overlooked the fact that the search() function then lost thread safety, as "max_len" would be shared by all threads of a process using libbsdiff. Fix the issue by not using a global variable, instead updating "max_len" via another pointer, just like the "pos" variable is handled already. Signed-off-by: Patrick McCarty <patrick.mccarty@intel.com>
1 parent 6127bd6 commit 2e5b809

File tree

1 file changed

+19
-21
lines changed

1 file changed

+19
-21
lines changed

src/diff.c

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -88,42 +88,41 @@ static int64_t matchlen(u_char *old, int64_t oldsize, u_char *new,
8888
return i;
8989
}
9090

91-
int64_t max_len = 0;
92-
9391
/**
9492
* Finds the longest matching array of bytes between the OLD and NEW file. The
9593
* old file is suffix-sorted; the suffix-sorted array is stored at I, and
96-
* indices to search between are indicated by ST (start) and EN (end). Returns
97-
* the length of the match, and POS is updated to the position of the match
98-
* within OLD.
94+
* indices to search between are indicated by ST (start) and EN (end). The
95+
* function does not return a value, but once a match is determined, POS is
96+
* updated to the position of the match within OLD, and MAX_LEN is set to the
97+
* match length.
9998
*/
100-
static int64_t search(int64_t *I, u_char *old, int64_t oldsize,
101-
u_char *new, int64_t newsize, int64_t st, int64_t en,
102-
int64_t *pos)
99+
static void search(int64_t *I, u_char *old, int64_t oldsize,
100+
u_char *new, int64_t newsize, int64_t st, int64_t en,
101+
int64_t *pos, int64_t *max_len)
103102
{
104103
int64_t x, y;
105104

106105
/* Initialize max_len for the binary search */
107106
if (st == 0 && en == oldsize) {
108-
max_len = matchlen(old, oldsize, new, newsize);
107+
*max_len = matchlen(old, oldsize, new, newsize);
109108
*pos = I[st];
110109
}
111110

112111
/* The binary search terminates here when "en" and "st" are adjacent
113112
* indices in the suffix-sorted array. */
114113
if (en - st < 2) {
115114
x = matchlen(old + I[st], oldsize - I[st], new, newsize);
116-
if (x > max_len) {
117-
max_len = x;
115+
if (x > *max_len) {
116+
*max_len = x;
118117
*pos = I[st];
119118
}
120119
y = matchlen(old + I[en], oldsize - I[en], new, newsize);
121-
if (y > max_len) {
122-
max_len = y;
120+
if (y > *max_len) {
121+
*max_len = y;
123122
*pos = I[en];
124123
}
125124

126-
return max_len;
125+
return;
127126
}
128127

129128
x = st + (en - st) / 2;
@@ -133,16 +132,16 @@ static int64_t search(int64_t *I, u_char *old, int64_t oldsize,
133132

134133
/* This match *could* be the longest one, so check for that here */
135134
int64_t tmp = matchlen(oldoffset, length, new, length);
136-
if (tmp > max_len) {
137-
max_len = tmp;
135+
if (tmp > *max_len) {
136+
*max_len = tmp;
138137
*pos = I[x];
139138
}
140139

141140
/* Determine how to continue the binary search */
142141
if (memcmp(oldoffset, new, length) < 0) {
143-
return search(I, old, oldsize, new, newsize, x, en, pos);
142+
return search(I, old, oldsize, new, newsize, x, en, pos, max_len);
144143
} else {
145-
return search(I, old, oldsize, new, newsize, st, x, pos);
144+
return search(I, old, oldsize, new, newsize, st, x, pos, max_len);
146145
}
147146
}
148147

@@ -617,9 +616,8 @@ int make_bsdiff_delta(char *old_filename, char *new_filename, char *delta_filena
617616
oldscore = 0;
618617

619618
for (scsc = scan += len; scan < newsize; scan++) {
620-
len =
621-
search(I, old_data, oldsize, new_data + scan, newsize - scan,
622-
0, oldsize, &pos);
619+
search(I, old_data, oldsize, new_data + scan, newsize - scan,
620+
0, oldsize, &pos, &len);
623621

624622
for (; scsc < scan + len; scsc++) {
625623
if ((scsc + lastoffset < oldsize) &&

0 commit comments

Comments
 (0)