Skip to content

Commit f1e6d95

Browse files
author
evanh
committed
Optimize serialization of sparse bit array
The binary Read function, while clean, was causing large performance hits by requiring a bytes Reader. Speed this up by removing the reader and parsing the bytes directly.
1 parent eb2f0c9 commit f1e6d95

File tree

4 files changed

+57
-21
lines changed

4 files changed

+57
-21
lines changed

bitarray/bitarray.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ func (ba *bitArray) Blocks() Iterator {
232232
return newBitArrayIterator(ba)
233233
}
234234

235+
func (ba *bitArray) IsEmpty() bool {
236+
return ba.anyset
237+
}
238+
235239
// complement flips all bits in this array.
236240
func (ba *bitArray) complement() {
237241
for i := uint64(0); i < uint64(len(ba.blocks)); i++ {

bitarray/encoding.go

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -95,43 +95,64 @@ func (ba *sparseBitArray) Serialize() ([]byte, error) {
9595
return w.Bytes(), nil
9696
}
9797

98+
// This function is a copy from the binary package, with some added error
99+
// checking to avoid panics. The function will return the value, and the number
100+
// of bytes read from the buffer. If the number of bytes is negative, then
101+
// not enough bytes were passed in and the return value will be zero.
102+
func Uint64FromBytes(b []byte) (uint64, int) {
103+
if len(b) < 8 {
104+
return 0, -1
105+
}
106+
107+
val := uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
108+
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
109+
return val, 8
110+
}
111+
98112
// Deserialize takes the incoming byte slice, and populates the sparseBitArray
99113
// with data in the bytes. Note that this will overwrite any capacity
100114
// specified when creating the sparseBitArray. Also note that if an error
101115
// is returned, the sparseBitArray this is called on might be populated
102116
// with partial data.
103117
func (ret *sparseBitArray) Deserialize(incoming []byte) error {
104-
r := bytes.NewReader(incoming[1:]) // Discard identifier
118+
var intsize = uint64(s / 8)
119+
var curLoc = uint64(1) // Ignore the identifier byte
105120

106121
var intsToRead uint64
107-
err := binary.Read(r, binary.LittleEndian, &intsToRead)
108-
if err != nil {
109-
return err
110-
}
111-
112-
var nextblock block
113-
for i := intsToRead; i > uint64(0); i-- {
114-
err = binary.Read(r, binary.LittleEndian, &nextblock)
115-
if err != nil {
116-
return err
122+
var bytesRead int
123+
intsToRead, bytesRead = Uint64FromBytes(incoming[curLoc : curLoc+intsize])
124+
if bytesRead < 0 {
125+
return errors.New("Invalid data for BitArray")
126+
}
127+
curLoc += intsize
128+
129+
var nextblock uint64
130+
ret.blocks = make([]block, intsToRead)
131+
for i := uint64(0); i < intsToRead; i++ {
132+
nextblock, bytesRead = Uint64FromBytes(incoming[curLoc : curLoc+intsize])
133+
if bytesRead < 0 {
134+
return errors.New("Invalid data for BitArray")
117135
}
118-
ret.blocks = append(ret.blocks, nextblock)
136+
ret.blocks[i] = block(nextblock)
137+
curLoc += intsize
119138
}
120139

121-
err = binary.Read(r, binary.LittleEndian, &intsToRead)
122-
if err != nil {
123-
return err
140+
intsToRead, bytesRead = Uint64FromBytes(incoming[curLoc : curLoc+intsize])
141+
if bytesRead < 0 {
142+
return errors.New("Invalid data for BitArray")
124143
}
144+
curLoc += intsize
125145

126146
var nextuint uint64
127-
for i := intsToRead; i > uint64(0); i-- {
128-
err = binary.Read(r, binary.LittleEndian, &nextuint)
129-
if err != nil {
130-
return err
147+
ret.indices = make(uintSlice, intsToRead)
148+
for i := uint64(0); i < intsToRead; i++ {
149+
nextuint, bytesRead = Uint64FromBytes(incoming[curLoc : curLoc+intsize])
150+
if bytesRead < 0 {
151+
return errors.New("Invalid data for BitArray")
131152
}
132-
ret.indices = append(ret.indices, nextuint)
153+
ret.indices[i] = nextuint
154+
curLoc += intsize
133155
}
134-
135156
return nil
136157
}
137158

bitarray/interface.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,14 @@ type BitArray interface {
6161
// And will bitwise and the two bitarrays and return a new bitarray
6262
// representing the result.
6363
And(other BitArray) BitArray
64+
// Nand will bitwise nand the two bitarrays and return a new bitarray
65+
// representing the result.
66+
Nand(other BitArray) BitArray
6467
// ToNums converts this bit array to the list of numbers contained
6568
// within it.
6669
ToNums() []uint64
70+
// IsEmpty checks to see if any values are set on the bitarray
71+
IsEmpty() bool
6772
}
6873

6974
// Iterator defines methods used to iterate over a bit array.

bitarray/sparse_bitarray.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,12 @@ func (sba *sparseBitArray) Nand(other BitArray) BitArray {
255255
return nandSparseWithDenseBitArray(sba, other.(*bitArray))
256256
}
257257

258+
func (sba *sparseBitArray) IsEmpty() bool {
259+
// This works because the and, nand and delete functions only
260+
// keep values that have a non-zero block.
261+
return len(sba.indices) == 0
262+
}
263+
258264
func (sba *sparseBitArray) copy() *sparseBitArray {
259265
blocks := make(blocks, len(sba.blocks))
260266
copy(blocks, sba.blocks)

0 commit comments

Comments
 (0)