Skip to content

Commit 966f59d

Browse files
remove some debug printouts
1 parent 3d45f39 commit 966f59d

File tree

1 file changed

+61
-15
lines changed

1 file changed

+61
-15
lines changed

src/DistributedNDArray.hxx

+61-15
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::WriteChunk(const
8787
char uuid_string[36];
8888
uuid_unparse(uuid_binary, uuid_string);
8989
std::string chunk_filename = std::string(uuid_string) + ".bin";
90+
91+
// if(chunk_filename.find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_-.") != std::string::npos) {
92+
// throw std::runtime_error("Error: non-compliant chunk filename detected!");
93+
// }
9094

9195
IndexVector stop_ind = start_ind + chunk.shape();
9296

@@ -108,21 +112,21 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::WriteChunk(const
108112
std::size_t sparse_vs_dense_expense_ratio = 20; // when also counting complexity of deserializing + rebuilding a dense chunk
109113
if(sparse_vs_dense_expense_ratio * num_nonzero_elems < num_elems) {
110114

111-
std::cout << "going to sparsify" << std::endl;
115+
// std::cout << "going to sparsify" << std::endl;
112116

113117
auto to_keep = [](scalar_t value) -> bool {
114118
return value != 0.0;
115119
};
116120
sparse_t sparse_chunk = sparse_t::From(chunk, to_keep, 0.0);
117121
meta.chunk_type = ChunkType::sparse;
118122

119-
std::cout << "after sparsification, " << sparse_chunk.NumEntries() << " entries remain" << std::endl;
123+
// std::cout << "after sparsification, " << sparse_chunk.NumEntries() << " entries remain" << std::endl;
120124

121125
m_ser.template serialize<ChunkMetadata>(ofs, meta);
122126
m_ser.template serialize<sparse_t>(ofs, sparse_chunk);
123127
}
124128
else {
125-
std::cout << "store as dense" << std::endl;
129+
// std::cout << "store as dense" << std::endl;
126130
m_ser.template serialize<ChunkMetadata>(ofs, meta);
127131
m_ser.template serialize<dense_t>(ofs, chunk);
128132
}
@@ -164,6 +168,12 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::rebuildIndex() {
164168
ifs.open(dir_entry.path(), std::ios::in | std::ios::binary);
165169
ChunkMetadata meta = m_ser.template deserialize<ChunkMetadata>(ifs);
166170
ifs.close();
171+
172+
// std::cout << "found metadata containing filename: '" + meta.filename + "' from path '" + std::string(dir_entry.path()) + "'!" << std::endl;
173+
174+
// if(meta.filename.find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_-.") != std::string::npos) {
175+
// throw std::runtime_error("Error: found metadata containing non-compliant filename: '" + meta.filename + "' from path '" + std::string(dir_entry.path()) + "'!");
176+
// }
167177

168178
m_chunk_index.push_back(meta);
169179
}
@@ -187,18 +197,18 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::RebuildChunks(co
187197
// 2) write the individual chunks, but don't touch the index (using `WriteChunk`)
188198
// 3) remove all chunks in the index (-> this will remove all the old ones), BUT not those that haven't been touched because they are already conforming to the correct size
189199

190-
std::cout << "in RebuildChunks" << std::endl;
200+
// std::cout << "in RebuildChunks" << std::endl;
191201

192202
if(requested_chunk_size.size() != dims) {
193203
throw std::runtime_error("Error: requested chunk size has wrong dimensionality!");
194204
}
195205

196-
std::cout << "rebuilding index" << std::endl;
206+
// std::cout << "rebuilding index" << std::endl;
197207

198208
// Make sure we start from a clean index
199209
rebuildIndex();
200210

201-
std::cout << "rebuilt index" << std::endl;
211+
// std::cout << "rebuilt index" << std::endl;
202212

203213
if(!isGloballyContiguous(getGlobalStartInd(), getGlobalStopInd())) {
204214
throw std::runtime_error("Error: refusing to rebuild chunks for a non-contiguous array!");
@@ -208,14 +218,14 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::RebuildChunks(co
208218

209219
IndexVector global_shape(this -> m_shape);
210220

211-
std::cout << "global shape" << std::endl;
212-
global_shape.print();
221+
// std::cout << "global shape" << std::endl;
222+
// global_shape.print();
213223

214224
IndexVector number_required_chunks = (global_shape + requested_chunk_size - 1) / requested_chunk_size;
215225

216-
std::cout << "will have " << std::endl;
217-
number_required_chunks.print();
218-
std::cout << " chunks after rebuilding" << std::endl;
226+
// std::cout << "will have " << std::endl;
227+
// number_required_chunks.print();
228+
// std::cout << " chunks after rebuilding" << std::endl;
219229

220230
index_t chunks_to_keep;
221231

@@ -279,18 +289,20 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::MergeChunks(std:
279289

280290
// 3) again, have the to_keep mechanism
281291

282-
std::cout << "in MergeNeighbouringChunks" << std::endl;
292+
// std::cout << "in MergeNeighbouringChunks" << std::endl;
283293

284294
rebuildIndex();
285295

286-
std::cout << "rebuilt index" << std::endl;
296+
// std::cout << "rebuilt index" << std::endl;
287297

288298
if(!isGloballyContiguous(getGlobalStartInd(), getGlobalStopInd())) {
289299
throw std::runtime_error("Error: refusing to merge chunks for a non-contiguous array!");
290300
}
291301

292302
calculateShape();
293303

304+
// std::cout << "have a total of " << m_chunk_index.size() << " chunks before merging" << std::endl;
305+
294306
// put chunks in order along the merging axis
295307
std::vector<std::size_t> chunk_order(m_chunk_index.size());
296308
std::iota(chunk_order.begin(), chunk_order.end(), 0);
@@ -326,9 +338,15 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::MergeChunks(std:
326338
chunks_to_merge.push_back(retrieveChunk(cur_chunk_index));
327339
std::size_t output_chunk_shape = chunks_to_merge.back().shape(dim_to_merge);
328340
std::size_t neighbour_chunk_index = cur_chunk_index;
341+
342+
// std::cout << "start merging tracer from chunk with index = " << neighbour_chunk_index << std::endl;
343+
329344
while(true) {
330345
try {
331346
neighbour_chunk_index = getNeighbouringChunkIndex(neighbour_chunk_index, dim_to_merge);
347+
348+
// std::cout << "have neighbouring chunk with index = " << neighbour_chunk_index << " / " << m_chunk_index.size() << std::endl;
349+
332350
chunks_to_merge.push_back(retrieveChunk(neighbour_chunk_index));
333351
std::erase(chunk_order, neighbour_chunk_index);
334352

@@ -409,12 +427,24 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::printChunks() {
409427
template <class T, std::size_t dims, template<class, std::size_t> class DenseT, template<class, std::size_t> class SparseT, class SerializerT>
410428
std::size_t DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::getNeighbouringChunkIndex(std::size_t chunk_index, std::size_t dim) {
411429

430+
if(chunk_index > m_chunk_index.size()) {
431+
throw std::runtime_error("Trying to look for neighbours of out-of-bounds chunk: " + std::to_string(chunk_index) + "/" + std::to_string(m_chunk_index.size()));
432+
}
433+
412434
ChunkMetadata& chunk_meta = m_chunk_index[chunk_index];
413435
IndexVector chunk_size_along_dim(dims, 0);
414436
chunk_size_along_dim(dim) = chunk_meta.stop_ind(dim) - chunk_meta.start_ind(dim);
415437
IndexVector neighbour_chunk_start_ind = chunk_meta.start_ind + chunk_size_along_dim;
416438

417-
return getChunkIndex(neighbour_chunk_start_ind);
439+
std::size_t neighbour_chunk_ind = getChunkIndex(neighbour_chunk_start_ind);
440+
441+
// if(neighbour_chunk_ind > m_chunk_index.size()) {
442+
// std::cout << "MMMM Found out-of-bounds neighbouring chunk: " + std::to_string(neighbour_chunk_ind) + "/" + std::to_string(m_chunk_index.size()) << std::endl;
443+
// std::cout << "MMMM From request with neighbour_chunk_start_ind = " << std::endl;
444+
// neighbour_chunk_start_ind.print();
445+
// }
446+
447+
return neighbour_chunk_ind;
418448
}
419449

420450
template <class T, std::size_t dims, template<class, std::size_t> class DenseT, template<class, std::size_t> class SparseT, class SerializerT>
@@ -472,15 +502,18 @@ std::size_t DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::getChunkI
472502

473503
if(chunkContainsInds(m_chunk_index[m_chunk_last_accessed], inds)) {
474504
[[likely]];
505+
// std::cout << "NNN m_chunk_last_accessed gave " << m_chunk_last_accessed << std::endl;
475506
return m_chunk_last_accessed;
476507
}
477508
else {
478509
// Trigger a full chunk lookup
479510
// TODO: have a search tree here with logarithmic instead of linear complexity
511+
// std::cout << "NNN trigger full chunk lookup" << std::endl;
480512
std::size_t chunk_ind = 0;
481513
for(chunk_ind = 0; chunk_ind < m_chunk_index.size(); chunk_ind++) {
482514
if(chunkContainsInds(m_chunk_index[chunk_ind], inds)) {
483-
m_chunk_last_accessed = chunk_ind;
515+
m_chunk_last_accessed = chunk_ind;
516+
// std::cout << "NNN full chunk lookup gave " << chunk_ind << std::endl;
484517
return chunk_ind;
485518
}
486519
}
@@ -496,6 +529,10 @@ std::size_t DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::getChunkI
496529

497530
template <class T, std::size_t dims, template<class, std::size_t> class DenseT, template<class, std::size_t> class SparseT, class SerializerT>
498531
DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::dense_t& DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::retrieveChunk(std::size_t chunk_ind) {
532+
533+
if(chunk_ind > m_chunk_index.size()) {
534+
throw std::runtime_error("Trying to retrieve out-of-bounds chunk: " + std::to_string(chunk_ind) + "/" + std::to_string(m_chunk_index.size()));
535+
}
499536

500537
ChunkMetadata& chunk_meta = m_chunk_index[chunk_ind];
501538

@@ -512,6 +549,15 @@ DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::dense_t& DistributedN
512549
std::fstream ifs;
513550
std::string chunk_path = m_dirpath + "/" + chunk_meta.filename;
514551
std::cout << "Loading chunk from " + chunk_path + " ... ";
552+
553+
// if(chunk_meta.filename.find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_-.") != std::string::npos) {
554+
// throw std::runtime_error("Error: trying to open chunk " + std::to_string(chunk_ind) + "/" + std::to_string(m_chunk_index.size()) + " from file with non-compliant name: '" + chunk_meta.filename + "'!");
555+
// }
556+
557+
if(!std::filesystem::exists(chunk_path)) {
558+
throw std::runtime_error("Error: trying to access non-existant file '" + chunk_path + "'!");
559+
}
560+
515561
ifs.open(chunk_path, std::ios::in | std::ios::binary);
516562
ChunkMetadata meta = m_ser.template deserialize<ChunkMetadata>(ifs);
517563

0 commit comments

Comments
 (0)