Skip to content

Commit e0f07fa

Browse files
yanwei-gryanwei
and
yanwei
authored
cherry pick the CreateExecutable updates (#671)
* close floating point check in popart * create pipeline resource when computation created. not do it at the time of set the cache computation item fix pixel bert detect application runtime error * add weiming build script fix * add inplementation of CreateExecutable move complie_and_run to createExecute, and more error handle logic Co-authored-by: yanwei <yw01041751@alibaba-inc.com>
1 parent 5cf3b6f commit e0f07fa

File tree

5 files changed

+152
-75
lines changed

5 files changed

+152
-75
lines changed

ODLA/platforms/odla_popart/odla_compute.cc

+28-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ odla_status odla_SetComputationItem(odla_computation comp, odla_item_type type,
6464
case 1001: // load cache directly, need set path of cache file
6565
PopartConfig::instance()->set_load_cache(true);
6666
PopartConfig::instance()->set_cache_path(reinterpret_cast<char*>(value));
67-
PopartConfig::instance()->extract_config_from_cache();
6867
break;
6968
default:
7069
std::cerr << "Unsupported property type: " << type << std::endl;
@@ -73,6 +72,26 @@ odla_status odla_SetComputationItem(odla_computation comp, odla_item_type type,
7372
return ODLA_SUCCESS;
7473
}
7574

75+
odla_status odla_CreateExecutable(odla_executable* executable,
76+
odla_context context, odla_computation comp) {
77+
popart::logging::info("Start to create Executable...");
78+
if (comp == nullptr) {
79+
popart::logging::err(
80+
"Failed to create Executable... Computation haven't been intialized.");
81+
return ODLA_FAILURE;
82+
} else {
83+
if (comp->session) {
84+
return comp->compile_and_export();
85+
} else {
86+
_odla_computation::instance()->init(true); // set is_compile to true
87+
// this comp init will create
88+
// executable
89+
_odla_computation::instance()->compile_and_export();
90+
}
91+
}
92+
return ODLA_SUCCESS;
93+
}
94+
7695
odla_status odla_StoreExecutable(const odla_char* file_name,
7796
odla_executable executable) {
7897
return ODLA_SUCCESS;
@@ -97,6 +116,13 @@ odla_status odla_CreateComputation(odla_computation* comp) {
97116
}
98117
// Read the config file
99118
if (!PopartConfig::instance()->inited()) {
119+
if (PopartConfig::instance()->load_cache()) {
120+
odla_status ret = PopartConfig::instance()->extract_config_from_cache();
121+
if (ret == ODLA_FAILURE) {
122+
popart::logging::err("load config from cache failed");
123+
return ret;
124+
}
125+
}
100126
PopartConfig::instance()->load_config(std::getenv("ODLA_POPART_CONFIG"));
101127
}
102128
_odla_computation::instance()->set_executor();
@@ -106,6 +132,7 @@ odla_status odla_CreateComputation(odla_computation* comp) {
106132
QManager::instance()->getQ()->init(
107133
PopartConfig::instance()->queue_capacity());
108134
}
135+
109136
return ODLA_SUCCESS;
110137
}
111138

ODLA/platforms/odla_popart/odla_popart.cc

+78-46
Original file line numberDiff line numberDiff line change
@@ -35,36 +35,6 @@
3535
_odla_computation* _odla_computation::instance_ = nullptr;
3636
std::mutex _odla_computation::comp_mutex_;
3737

38-
void compile_and_export_cache(std::string catch_file_name,
39-
std::string config_file_name) {
40-
std::fstream catch_fs(catch_file_name,
41-
std::ios_base::out | std::ifstream::binary);
42-
std::fstream config_fs;
43-
std::string config_string;
44-
if (config_file_name.size() > 0) {
45-
config_fs.open(config_file_name, std::ios_base::in | std::ifstream::binary);
46-
if (!config_fs.is_open()) {
47-
popart::logging::warn(
48-
"invalid config file name:[ {} ] will use default config",
49-
config_file_name);
50-
config_string = PopartConfig::instance()->get_default_config_string();
51-
}
52-
std::ostringstream config_ss;
53-
config_ss << config_fs.rdbuf();
54-
config_string = config_ss.str();
55-
} else {
56-
config_string = PopartConfig::instance()->get_default_config_string();
57-
}
58-
59-
int config_size = config_string.size();
60-
catch_fs.write((char*)&config_size, sizeof(config_string.size()));
61-
catch_fs.write(config_string.c_str(), config_string.size());
62-
63-
_odla_computation::instance()->session->compileAndExport(catch_fs.flush());
64-
catch_fs.flush();
65-
catch_fs.close();
66-
}
67-
6838
void compute_loop(odla_computation comp) {
6939
// setup the stepio with allbacks
7040
popart::StepIOCallback stepio(input_callback, input_complete_callback,
@@ -119,7 +89,59 @@ void compute_loop(odla_computation comp) {
11989
comp->thread_done();
12090
}
12191

122-
void _odla_computation::init() {
92+
odla_status _odla_computation::compile_and_export() {
93+
popart::logging::warn("Start compile and export");
94+
const std::string& cache_file_name =
95+
PopartConfig::instance()->get_cache_path();
96+
std::string file_suffix(".popart");
97+
int file_prefix = cache_file_name.rfind(file_suffix);
98+
if (file_prefix == std::string::npos ||
99+
file_prefix + file_suffix.size() < cache_file_name.size()) {
100+
popart::logging::err("Bad cache file name");
101+
return ODLA_FAILURE;
102+
}
103+
if (file_prefix == std::string::npos) {
104+
file_prefix = cache_file_name.size() - 1;
105+
}
106+
std::string config_file_name(cache_file_name.substr(0, file_prefix) +
107+
".json");
108+
std::fstream cache_fs(cache_file_name,
109+
std::ios_base::out | std::ifstream::binary);
110+
if (!cache_fs.is_open()) {
111+
popart::logging::err("Open or create cache file falied");
112+
return ODLA_FAILURE;
113+
}
114+
std::fstream config_fs;
115+
std::string config_string;
116+
if (config_file_name.size() > 0) {
117+
config_fs.open(config_file_name, std::ios_base::in | std::ifstream::binary);
118+
if (!config_fs.is_open()) {
119+
popart::logging::warn(
120+
"invalid config file name:[ {} ] will use default config",
121+
config_file_name);
122+
PopartConfig::instance()->use_default();
123+
config_string = PopartConfig::instance()->get_default_config_string();
124+
} else {
125+
std::ostringstream config_ss;
126+
config_ss << config_fs.rdbuf();
127+
config_string = config_ss.str();
128+
}
129+
} else {
130+
config_string = PopartConfig::instance()->get_default_config_string();
131+
}
132+
133+
int config_size = config_string.size();
134+
cache_fs.write((char*)&config_size, sizeof(config_size));
135+
cache_fs.write(config_string.c_str(), config_string.size());
136+
137+
_odla_computation::instance()->session->compileAndExport(cache_fs.flush());
138+
139+
cache_fs.flush();
140+
cache_fs.close();
141+
config_fs.close();
142+
}
143+
144+
void _odla_computation::init(bool is_compile) {
123145
if (!session) {
124146
std::lock_guard<std::mutex> guard(init_mutex_);
125147
if (!session) {
@@ -167,21 +189,31 @@ void _odla_computation::init() {
167189
auto new_session = popart::InferenceSession::createFromOnnxModel(
168190
proto, data_flow, device, popart::InputShapeInfo(), session_opts_);
169191

170-
if (PopartConfig::instance()->load_cache()) {
171-
popart::logging::info("Load cachefile from existing stream");
172-
auto cache_fs = PopartConfig::instance()->get_cache_fs();
173-
new_session->loadExecutableFromStream(*(cache_fs.get()));
174-
}
175-
new_session->prepareDevice();
176-
new_session->setRandomSeed(0); // Init seed
177-
new_session->weightsFromHost(); // Copy weights from host to IPU
178-
// If in parallel mode, start the thread
179-
ExecutionMode mode = PopartConfig::instance()->execution_mode();
180-
if (PIPELINE == mode || PARALLEL == mode) {
181-
std::thread parallel_thread(compute_loop, this);
182-
thread_state_ = RUNNING;
183-
popart::logging::warn("Parallel loop has been started");
184-
parallel_thread.detach();
192+
if (!is_compile) {
193+
if (PopartConfig::instance()->load_cache()) {
194+
popart::logging::info("Load cachefile from existing stream");
195+
auto cache_fs = PopartConfig::instance()->get_cache_fs();
196+
if (cache_fs->is_open()) {
197+
try {
198+
new_session->loadExecutableFromStream(*(cache_fs.get()));
199+
} catch (std::exception& e) {
200+
popart::logging::err("bad cache file, will compile the graph");
201+
}
202+
}
203+
}
204+
205+
new_session->prepareDevice();
206+
new_session->setRandomSeed(0); // Init seed
207+
new_session->weightsFromHost(); // Copy weights from host to IPU
208+
209+
// If in parallel mode, start the thread
210+
ExecutionMode mode = PopartConfig::instance()->execution_mode();
211+
if (PIPELINE == mode || PARALLEL == mode) {
212+
std::thread parallel_thread(compute_loop, this);
213+
thread_state_ = RUNNING;
214+
popart::logging::warn("Parallel loop has been started");
215+
parallel_thread.detach();
216+
}
185217
}
186218
session =
187219
std::move(new_session); // set session after all initialization done.

ODLA/platforms/odla_popart/odla_popart.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,15 @@ struct _odla_computation {
131131
thread_state_(DONE) {
132132
builder->setAttribute(popart::sVirtualGraphAttribute, 0);
133133
}
134-
void init();
134+
void init(bool is_compile = false);
135135
std::string set_pipeline_stage();
136136
void set_session_opts();
137137
void set_executor();
138138
void set_opts();
139139
bool use_pipeline();
140140
bool hold();
141+
odla_status compile_and_export();
142+
141143
inline Execution* executor() { return executor_; }
142144
inline bool is_done() { return thread_state_ != RUNNING; }
143145
inline void mark_done() {

ODLA/platforms/odla_popart/popart_config.cc

+35-21
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,18 @@ void PopartConfig::use_default() {
4242
queue_capacity_ = 1024 * 1024;
4343
debug_ = false;
4444
default_config_string_ =
45-
"{ \
46-
\"version\":\"1.0.0\", \
47-
\"amp\":0.6, \
48-
\"batches_per_step\":1, \
49-
\"execution_mode\":\"sequence\", \
50-
\"ipu_num\":1, \
51-
\"load_onnx\":false, \
52-
\"load_onnx_path\":\"test-load-time.onnx\", \
53-
\"queue_type\":\"LockFreeQueue\", \
54-
\"queue_capacity\":1048576, \
55-
\"debug\": false \
56-
}";
45+
"{\n\
46+
\"version\":\"1.0.0\",\n\
47+
\"amp\":0.6,\n\
48+
\"batches_per_step\":1,\n\
49+
\"execution_mode\":\"sequence\",\n\
50+
\"ipu_num\":1,\n\
51+
\"load_onnx\":false, \n\
52+
\"load_onnx_path\":\"test-load-time.onnx\",\n\
53+
\"queue_type\":\"LockFreeQueue\",\n\
54+
\"queue_capacity\":1048576,\n\
55+
\"debug\": false\n\
56+
}\n";
5757
}
5858

5959
void PopartConfig::load_config(const char* file_path) {
@@ -206,14 +206,28 @@ bool PopartConfig::get_pipeline_setting(const std::string& node_name,
206206
return false;
207207
}
208208

209-
void PopartConfig::extract_config_from_cache() {
210-
cache_fs =
211-
std::make_shared<std::ifstream>(cache_path_, std::ifstream::binary);
209+
odla_status PopartConfig::extract_config_from_cache() {
210+
cache_fs = std::make_shared<std::fstream>(
211+
cache_path_,
212+
std::ios_base::in | std::ios_base::out | std::ifstream::binary);
212213
int config_len = 0;
213-
cache_fs->read((char*)&config_len, sizeof(config_len));
214-
std::vector<char> config_data_buffer(config_len);
215-
cache_fs->read(config_data_buffer.data(), config_len);
216-
std::string config_string(config_data_buffer.begin(),
217-
config_data_buffer.end());
218-
load_from_string(config_string);
214+
popart::logging::info("load config from cache file: {}", cache_path_.c_str());
215+
if (!cache_fs->is_open()) {
216+
popart::logging::err("cache file is not exist");
217+
return ODLA_FAILURE;
218+
}
219+
if (cache_fs->read((char*)&config_len, sizeof(config_len))) {
220+
std::vector<char> config_data_buffer(config_len);
221+
if (cache_fs->read(config_data_buffer.data(), config_len)) {
222+
std::string config_string(config_data_buffer.begin(),
223+
config_data_buffer.end());
224+
try {
225+
load_from_string(config_string);
226+
} catch (std::exception& e) {
227+
popart::logging::err("load from cached config string failed.");
228+
return ODLA_FAILURE;
229+
}
230+
}
231+
}
232+
return ODLA_SUCCESS;
219233
}

ODLA/platforms/odla_popart/popart_config.h

+8-6
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <string>
2626
#include <vector>
2727

28+
#include "ODLA/odla_common.h"
2829
#include "json.hpp"
2930
/**
3031
* The configuration format like follows:
@@ -82,10 +83,9 @@ class PopartConfig {
8283

8384
bool inited_;
8485

85-
std::shared_ptr<std::ifstream> cache_fs;
86+
std::shared_ptr<std::fstream> cache_fs;
8687

8788
static PopartConfig* instance_;
88-
void use_default();
8989
void load_from_file(const std::string& file_path);
9090

9191
public:
@@ -99,6 +99,8 @@ class PopartConfig {
9999
inited_(false),
100100
ipu_num_(1) {}
101101
~PopartConfig() {}
102+
103+
void use_default();
102104
static PopartConfig* instance() { return instance_; }
103105
const std::string& version() { return version_; }
104106
inline float amp() { return amp_; };
@@ -118,11 +120,11 @@ class PopartConfig {
118120
inline int queue_capacity() { return queue_capacity_; }
119121
inline bool debug() { return debug_; }
120122
inline bool inited() { return inited_; }
121-
inline std::shared_ptr<std::ifstream> get_cache_fs() { return cache_fs; }
122-
inline void set_cache_fs(std::shared_ptr<std::ifstream> fs) { cache_fs = fs; }
123+
inline std::shared_ptr<std::fstream> get_cache_fs() { return cache_fs; }
124+
inline void set_cache_fs(std::shared_ptr<std::fstream> fs) { cache_fs = fs; }
123125

124126
inline bool load_cache() { return load_cache_; }
125-
inline const std::string load_cache_path() { return cache_path_; }
127+
inline const std::string& get_cache_path() { return cache_path_; }
126128
inline void set_load_cache(bool is_load_cache) {
127129
load_cache_ = is_load_cache;
128130
}
@@ -135,7 +137,7 @@ class PopartConfig {
135137
void load_config(const char* file_path);
136138
bool get_pipeline_setting(const std::string& node_name, int64_t& ipu_idx,
137139
int64_t& pipeline_stage);
138-
void extract_config_from_cache();
140+
odla_status extract_config_from_cache();
139141

140142
private:
141143
void set_pipeline_setting(const std::string& name_pattern, int ipu_idx,

0 commit comments

Comments
 (0)