Skip to content

Commit f9a9d47

Browse files
authored
Add logs and avoid multiple init called when exception occurred (#916)
1 parent c6b6bc5 commit f9a9d47

File tree

4 files changed

+19
-4
lines changed

4 files changed

+19
-4
lines changed

ODLA/platforms/odla_popart/odla_compute.cc

+1
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ odla_status odla_DestroyComputation(odla_computation comp) {
240240
}
241241
popart::logging::warn("reset config state, comp: {}", comp);
242242
PopartConfig::instance()->reset_init_state();
243+
popart::logging::warn("odla_DestroyComputation successfully, comp: {}", comp);
243244

244245
return ODLA_SUCCESS;
245246
}

ODLA/platforms/odla_popart/odla_popart.cc

+9-2
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ odla_status _odla_computation::init(bool is_compile) {
199199
std::lock_guard<std::mutex> guard(init_mutex_);
200200
if (!session) {
201201
POPLAR_TRY
202+
// only continue to init the computation when no failure
203+
if (ODLA_SUCCESS != QManager::instance()->get_status())
204+
return QManager::instance()->get_status();
202205
odla_status status = set_opts();
203206
if (status != ODLA_SUCCESS) {
204207
popart::logging::err("set computation option failed");
@@ -213,7 +216,7 @@ odla_status _odla_computation::init(bool is_compile) {
213216
popart::AnchorReturnType("All"));
214217
// Acquire IPU
215218
if (opts.use_ipu_model) {
216-
popart::logging::info("Using IPU Model to run.");
219+
popart::logging::warn("Using IPU Model to run.");
217220
std::map<std::string, std::string> deviceOpts{
218221
{"numIPUs", std::to_string(opts.ipu_num)}, {"tilesPerIPU", "1216"}};
219222
device =
@@ -230,6 +233,7 @@ odla_status _odla_computation::init(bool is_compile) {
230233
throw std::runtime_error(
231234
"Failed to get a device when initializing odla_computation");
232235
}
236+
popart::logging::warn("Device acquired to run model");
233237

234238
// Create and config SessionOptions
235239
set_session_opts();
@@ -255,6 +259,9 @@ odla_status _odla_computation::init(bool is_compile) {
255259
// Create InferenceSession
256260
new_session = std::move(popart::InferenceSession::createFromOnnxModel(
257261
proto, data_flow, device, popart::InputShapeInfo(), session_opts_));
262+
popart::logging::warn(
263+
"New session: {} has been created for computation: {}",
264+
new_session.get(), this);
258265

259266
if (!is_compile) {
260267
if (PopartConfig::instance()->load_or_save_cache()) {
@@ -404,7 +411,7 @@ bool _odla_computation::hold() {
404411
} else {
405412
std::stringstream ss_holder;
406413
ss_holder << thread_id_of_holder;
407-
popart::logging::warn(
414+
popart::logging::info(
408415
"The odla_computation {} has been held by thread: {}"
409416
", when thread {} try to hold it.",
410417
this, thread_id_of_holder, this_thread_id);

ODLA/platforms/odla_popart/odla_popart.h

+8-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,14 @@ struct _odla_computation {
153153
inline bool is_compile_only() { return is_compile_only_; }
154154
inline void release_session() {
155155
if (session != nullptr) {
156-
session->getDevice().getDeviceInfo()->detach();
156+
if (session->getDevice().getDeviceInfo() != nullptr) {
157+
popart::logging::warn("Tring to detach device for computation: {}",
158+
this);
159+
session->getDevice().getDeviceInfo()->detach();
160+
} else
161+
popart::logging::warn(
162+
"Device info is nullptr when try to detach for computation: {}",
163+
this);
157164
popart::logging::warn(
158165
"The computation:{} session:{} detached from device", this,
159166
session.get());

ODLA/platforms/odla_popart/popart_config.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class PopartConfig {
115115
std::lock_guard<std::mutex> guard(config_mutex_);
116116
if (inited_) {
117117
inited_ = false;
118-
if (cache_fs->is_open()) {
118+
if (cache_fs && cache_fs->is_open()) {
119119
cache_fs->close();
120120
cache_fs->clear();
121121
}

0 commit comments

Comments
 (0)