Skip to content

Commit c49021c

Browse files
Some first performance improvements (#26)
Some very simple (but helpful) performance improvements: speed up signal calculation by roughly 3x
1 parent f2afc6c commit c49021c

8 files changed

+186
-36
lines changed

src/CylindricalWeightingFieldCalculator.cxx

+1-13
Original file line numberDiff line numberDiff line change
@@ -384,20 +384,14 @@ void CylindricalWeightingFieldCalculator::Calculate(std::filesystem::path outdir
384384
}
385385

386386
cld.ind_t = stepcnt++;
387-
std::cout << "entering saving chunkloop" << std::endl;
388387
m_f -> loop_in_chunks(meep::eisvogel_saving_chunkloop, static_cast<void*>(&cld), m_f -> total_volume());
389-
std::cout << "exit saving chunkloop" << std::endl;
390388

391389
if((stepcnt % 400) == 0) {
392-
std::cout << "start chunk merging" << std::endl;
393390
fstor -> MergeChunks(0, 400);
394-
std::cout << "end chunk merging" << std::endl;
395391
}
396392
}
397393

398-
std::cout << "start chunk merging" << std::endl;
399394
fstor -> MergeChunks(0, 400);
400-
std::cout << "end chunk merging" << std::endl;
401395

402396
// TODO: again, will get better once the three separate arrays are gone
403397
// TODO: for large weighting fields, will have to move chunks to the permanent location continuously throughout the calculation so as not to fill up local storage
@@ -418,13 +412,7 @@ void CylindricalWeightingFieldCalculator::Calculate(std::filesystem::path outdir
418412

419413
if(meep::am_master()) {
420414
std::shared_ptr<CylindricalWeightingField> cwf = std::make_shared<CylindricalWeightingField>(outdir, *m_start_coords, *m_end_coords);
421-
cwf -> MakeMetadataPersistent();
422-
423-
// Sometimes need to wait for all files to show up?
424-
// std::this_thread::sleep_for(std::chrono::milliseconds(1000));
425-
426-
std::cout << "start defragmentation" << std::endl;
415+
cwf -> MakeMetadataPersistent();
427416
cwf -> RebuildChunks(requested_chunk_size);
428-
std::cout << "end defragmentation" << std::endl;
429417
}
430418
}

src/DefaultSerializationTraits.hxx

+4-4
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,10 @@ namespace stor {
111111
}
112112
};
113113

114-
template<std::size_t n>
115-
struct Traits<std::array<float, n>> {
114+
// template<std::size_t n>
115+
// struct Traits<std::array<float, n>> {
116116

117-
};
117+
// };
118118

119119
// For general vectors
120120
template <typename T>
@@ -192,7 +192,7 @@ namespace stor {
192192

193193
type retval;
194194
for(std::size_t ind = 0; ind < keys.size(); ind++) {
195-
retval[keys[ind]] = values[ind];
195+
retval.insert(retval.end(), std::pair{keys[ind], values[ind]});
196196
}
197197

198198
return retval;

src/DenseNDArray.hh

+15
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,21 @@ public:
5757

5858
return retval;
5959
}
60+
61+
static DenseNDArray<T, dims> FromSparseFile(std::iostream& stream) {
62+
T default_value = stor::Traits<T>::deserialize(stream);
63+
shape_t shape = stor::Traits<shape_t>::deserialize(stream);
64+
DenseNDArray<T, dims> retval(shape, default_value);
65+
66+
std::vector<std::array<std::size_t, dims>> keys = stor::Traits<std::vector<std::array<std::size_t, dims>>>::deserialize(stream);
67+
std::vector<T> values = stor::Traits<std::vector<T>>::deserialize(stream);
68+
69+
for(std::size_t el_ind = 0; el_ind < keys.size(); el_ind++) {
70+
retval(keys[el_ind]) = values[el_ind];
71+
}
72+
73+
return retval;
74+
}
6075

6176
DenseNDArray(const shape_t& shape, std::vector<T>&& data) : NDArray<T, dims>(shape) {
6277
m_strides[0] = 1;

src/DistributedNDArray.hh

+2
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ private:
108108
// can think about turning this into a class
109109
index_t m_chunk_index;
110110

111+
std::size_t m_chunk_last_accessed;
112+
111113
// The index may not start at {0, 0, 0}
112114
IndexVector m_global_start_ind;
113115

src/DistributedNDArray.hxx

+29-12
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ namespace stor {
3030
template <class T, std::size_t dims, template<class, std::size_t> class DenseT, template<class, std::size_t> class SparseT, class SerializerT>
3131
DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::DistributedNDArray(std::string dirpath, std::size_t max_cache_size, SerializerT& ser) :
3232
NDArray<T, dims>(), m_dirpath(dirpath), m_indexpath(dirpath + "/index.bin"), m_max_cache_size(max_cache_size),
33-
m_global_start_ind(dims, 0), m_ser(ser) {
33+
m_chunk_last_accessed(0), m_global_start_ind(dims, 0), m_ser(ser) {
3434

3535
// Create directory if it does not already exist
3636
if(!std::filesystem::exists(m_dirpath)) {
@@ -104,7 +104,8 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::WriteChunk(const
104104
std::size_t num_elems = chunk.volume();
105105

106106
// pays off to store as sparse chunk
107-
std::size_t sparse_vs_dense_expense_ratio = 3; // sparse storage is approximately 3x as expensive as dense storage per nonzero element
107+
// std::size_t sparse_vs_dense_expense_ratio = 3; // when only counting storage space: sparse storage is approximately 3x as expensive as dense storage per nonzero element
108+
std::size_t sparse_vs_dense_expense_ratio = 20; // when also counting complexity of deserializing + rebuilding a dense chunk
108109
if(sparse_vs_dense_expense_ratio * num_nonzero_elems < num_elems) {
109110

110111
std::cout << "going to sparsify" << std::endl;
@@ -232,15 +233,15 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::RebuildChunks(co
232233
dense_t current_chunk = retrieveChunk(chunk_index);
233234

234235
if(actual_chunk_shape == current_chunk.shape()) {
235-
std::cout << "chunk already has the correct size, keep it" << std::endl;
236+
// std::cout << "chunk already has the correct size, keep it" << std::endl;
236237
chunks_to_keep.push_back(m_chunk_index[chunk_index]);
237238
continue;
238239
}
239240

240-
std::cout << "now working on rebuild chunk with inds" << std::endl;
241-
std::cout << "chunk_inds_start = " << std::endl;
241+
// std::cout << "now working on rebuild chunk with inds" << std::endl;
242+
// std::cout << "chunk_inds_start = " << std::endl;
242243
chunk_inds_start.print();
243-
std::cout << "chunk_inds_end = " << std::endl;
244+
// std::cout << "chunk_inds_end = " << std::endl;
244245
chunk_inds_end.print();
245246

246247
dense_t chunk = range(chunk_inds_start, chunk_inds_end);
@@ -436,11 +437,26 @@ bool DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::chunkContainsInd
436437

437438
template <class T, std::size_t dims, template<class, std::size_t> class DenseT, template<class, std::size_t> class SparseT, class SerializerT>
438439
std::size_t DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::getChunkIndex(const IndexVector& inds) {
439-
std::size_t chunk_ind = 0;
440-
for(chunk_ind = 0; chunk_ind < m_chunk_index.size(); chunk_ind++) {
441-
if(chunkContainsInds(m_chunk_index[chunk_ind], inds)) {
442-
return chunk_ind;
443-
}
440+
441+
if(m_chunk_index.size() == 0) {
442+
[[unlikely]];
443+
throw ChunkNotFoundError();
444+
}
445+
446+
if(chunkContainsInds(m_chunk_index[m_chunk_last_accessed], inds)) {
447+
[[likely]];
448+
return m_chunk_last_accessed;
449+
}
450+
else {
451+
// Trigger a full chunk lookup
452+
// TODO: have a search tree here with logarithmic instead of linear complexity
453+
std::size_t chunk_ind = 0;
454+
for(chunk_ind = 0; chunk_ind < m_chunk_index.size(); chunk_ind++) {
455+
if(chunkContainsInds(m_chunk_index[chunk_ind], inds)) {
456+
m_chunk_last_accessed = chunk_ind;
457+
return chunk_ind;
458+
}
459+
}
444460
}
445461

446462
std::cout << "HHHHHHH" << std::endl;
@@ -476,7 +492,8 @@ DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::dense_t& DistributedN
476492
m_chunk_cache.insert({chunk_ind, m_ser.template deserialize<dense_t>(ifs)});
477493
}
478494
else if(meta.chunk_type == ChunkType::sparse) {
479-
m_chunk_cache.insert({chunk_ind, dense_t::From(m_ser.template deserialize<sparse_t>(ifs))});
495+
m_chunk_cache.insert({chunk_ind, dense_t::FromSparseFile(ifs)});
496+
// m_chunk_cache.insert({chunk_ind, dense_t::From(m_ser.template deserialize<sparse_t>(ifs))});
480497
}
481498
else {
482499
throw std::runtime_error("Error: unknown chunk type encountered!");

src/NDArrayOperations.hh

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ namespace NDArrayOps {
1010
template <class T, std::size_t dims>
1111
DenseNDArray<T, dims> concatenate(const DenseNDArray<T, dims>& arr_1, const DenseNDArray<T, dims>& arr_2, std::size_t axis) {
1212

13-
std::cout << " ---> START CONCATENATE <---" << std::endl;
13+
// std::cout << " ---> START CONCATENATE <---" << std::endl;
1414

1515
if(axis >= dims) {
1616
throw std::runtime_error("Error: 'axis' out of bounds");
@@ -39,7 +39,7 @@ namespace NDArrayOps {
3939

4040
DenseNDArray<T, dims> retval(final_shape_crutch, 0.0);
4141

42-
std::cout << " ---> MIGRATE ARR_1 <---" << std::endl;
42+
// std::cout << " ---> MIGRATE ARR_1 <---" << std::endl;
4343

4444
// Migrate contents of arr_1
4545
{
@@ -51,7 +51,7 @@ namespace NDArrayOps {
5151
}
5252
}
5353

54-
std::cout << " ---> MIGRATE ARR_2 <---" << std::endl;
54+
// std::cout << " ---> MIGRATE ARR_2 <---" << std::endl;
5555

5656
// Migrate contents of arr_2
5757
{
@@ -68,7 +68,7 @@ namespace NDArrayOps {
6868
}
6969
}
7070

71-
std::cout << " ---> FINISH CONCATENATE <---" << std::endl;
71+
// std::cout << " ---> FINISH CONCATENATE <---" << std::endl;
7272

7373
return retval;
7474
}

src/SparseNDArray.hh

+69
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <array>
55
#include <map>
6+
#include <chrono>
67
#include "NDArray.hh"
78
#include "DenseNDArray.hh"
89
#include "Eisvogel/IteratorUtils.hh"
@@ -108,6 +109,74 @@ namespace stor {
108109
};
109110
}
110111

112+
// namespace stor {
113+
114+
// template<typename T, std::size_t dims>
115+
// struct Traits<SparseNDArray<T, dims>> {
116+
// using type = SparseNDArray<T, dims>;
117+
// using shape_t = typename type::shape_t;
118+
// using data_t = typename type::data_t;
119+
120+
// static void serialize(std::iostream& stream, const type& val) {
121+
// Traits<T>::serialize(stream, val.m_default_value);
122+
// Traits<shape_t>::serialize(stream, val.m_shape);
123+
124+
// // Convert array data from std::map<index, value> into two 1-dimensional vectors:
125+
// // (index_1, index_2, ...) with total length of `index_len`, and
126+
// // (value_1, value_2, ...) with total length of `number_entries`
127+
// std::size_t number_entries = val.m_data.size();
128+
// std::size_t index_len = dims * number_entries;
129+
130+
// std::vector<std::size_t> index_vec(index_len);
131+
// std::vector<T> data_vec(number_entries);
132+
133+
// // Fill the two vectors ...
134+
// auto it_index_vec = index_vec.begin();
135+
// auto it_data_vec = data_vec.begin();
136+
// for (auto const& [key, val] : val.m_data) {
137+
// std::copy(key.cbegin(), key.cend(), it_index_vec);
138+
// *it_data_vec = val;
139+
140+
// std::advance(it_data_vec, 1);
141+
// std::advance(it_index_vec, dims);
142+
// }
143+
144+
// // ... and serialize them
145+
// Traits<std::vector<std::size_t>>::serialize(stream, index_vec);
146+
// Traits<std::vector<T>>::serialize(stream, data_vec);
147+
// }
148+
149+
// static type deserialize(std::iostream& stream) {
150+
151+
// std::chrono::high_resolution_clock::time_point t_start = std::chrono::high_resolution_clock::now();
152+
153+
// T default_value = Traits<T>::deserialize(stream);
154+
// shape_t shape = Traits<shape_t>::deserialize(stream);
155+
156+
// std::vector<std::size_t> index_vec = Traits<std::vector<std::size_t>>::deserialize(stream);
157+
// std::vector<T> data_vec = Traits<std::vector<T>>::deserialize(stream);
158+
159+
// // Fill `data` map from `index_vec` and `data_vec` ...
160+
// data_t data;
161+
// auto it_index_vec = index_vec.begin();
162+
// auto it_data_vec = data_vec.begin();
163+
// std::array<std::size_t, dims> cur_ind;
164+
// while(it_data_vec != data_vec.end()) {
165+
// std::copy_n(it_index_vec, dims, cur_ind.begin());
166+
// data.insert(data.end(), std::pair{cur_ind, *it_data_vec});
167+
168+
// std::advance(it_data_vec, 1);
169+
// std::advance(it_index_vec, dims);
170+
// }
171+
172+
// // ... and build the sparse array
173+
// SparseNDArray<T, dims> retval(shape, default_value);
174+
// retval.m_data = data;
175+
// return retval;
176+
// }
177+
// };
178+
// }
179+
111180
// Some type shortcuts
112181
template <class T>
113182
using SparseScalarField3D = SparseNDArray<T, 3>;

tests/io/testSerialization.cxx

+62-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "Serialization.hh"
22
#include "Eisvogel/Common.hh"
3+
#include "DenseNDArray.hh"
4+
#include "SparseNDArray.hh"
35

46
#include <fstream>
57
#include <iostream>
@@ -37,7 +39,7 @@ int test_serialization_vector(std::string ser_path, std::size_t length) {
3739
std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now();
3840

3941
std::chrono::duration<double> time_span = duration_cast<std::chrono::duration<double>>(t_end - t_start);
40-
std::cout << "Completed in " << time_span.count() << " seconds." << std::endl;
42+
std::cout << "std::vector --> Completed in " << time_span.count() << " seconds." << std::endl;
4143

4244
for(std::size_t ind = 0; ind < length; ind++) {
4345
if(vec[ind] != res[ind]) {
@@ -48,8 +50,65 @@ int test_serialization_vector(std::string ser_path, std::size_t length) {
4850
return 0;
4951
}
5052

53+
template <std::size_t dims>
54+
int test_serialization_sparse_array(std::string ser_path, std::size_t size) {
55+
56+
// Fill random sparse array
57+
std::array<std::size_t, dims> shape;
58+
for(std::size_t dim = 0; dim < dims; dim++) {
59+
shape[dim] = size;
60+
}
61+
DenseNDArray<scalar_t, dims> darr(shape, 1.0);
62+
63+
auto to_keep = [](float value) -> bool {
64+
return value != 0.0;
65+
};
66+
SparseNDArray<scalar_t, dims> sparr = SparseNDArray<scalar_t, dims>::From(darr, to_keep, 0.0);
67+
68+
std::chrono::high_resolution_clock::time_point t_start = std::chrono::high_resolution_clock::now();
69+
70+
std::fstream ofs;
71+
ofs.open(ser_path, std::ios::out | std::ios::binary);
72+
stor::DefaultSerializer oser;
73+
oser.serialize(ofs, sparr);
74+
ofs.close();
75+
76+
std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now();
77+
std::chrono::duration<double> time_span = duration_cast<std::chrono::duration<double>>(t_end - t_start);
78+
std::cout << "SparseNDArray --> Serialization completed in " << time_span.count() << " seconds." << std::endl;
79+
80+
t_start = std::chrono::high_resolution_clock::now();
81+
82+
std::fstream ifs;
83+
ifs.open(ser_path, std::ios::in | std::ios::binary);
84+
stor::DefaultSerializer iser;
85+
SparseNDArray<scalar_t, dims> sparr_read = iser.deserialize<SparseNDArray<scalar_t, dims>>(ifs);
86+
ifs.close();
87+
88+
t_end = std::chrono::high_resolution_clock::now();
89+
time_span = duration_cast<std::chrono::duration<double>>(t_end - t_start);
90+
std::cout << "SparseNDArray --> Deserialization completed in " << time_span.count() << " seconds." << std::endl;
91+
92+
DenseNDArray<scalar_t, dims> darr_read = DenseNDArray<scalar_t, dims>::From(sparr_read);
93+
94+
IndexVector start_inds(dims, 0);
95+
IndexVector end_inds = darr_read.shape();
96+
for(IndexCounter cnt(start_inds, end_inds); cnt.running(); ++cnt) {
97+
IndexVector cur_ind = cnt.index();
98+
if(darr(cur_ind) != darr_read(cur_ind)) {
99+
throw std::runtime_error("Error: mistake");
100+
}
101+
}
102+
103+
std::cout << "Test passed" << std::endl;
104+
105+
return 0;
106+
}
107+
51108
int main(int argc, char* argv[]) {
52109

53-
std::string ser_path = "ser_test.bin";
54-
test_serialization_vector(ser_path, 1e6);
110+
std::string ser_path = "/tmp/ser_test.bin";
111+
112+
// test_serialization_vector(ser_path, 1e6);
113+
test_serialization_sparse_array<3>(ser_path, 200);
55114
}

0 commit comments

Comments
 (0)