Skip to content

Commit 43d6461

Browse files
committed
libdistributed version 0.0.4
Major Changes + Breaking Change -- StopToken has been renamed to TaskManager<RequestType> and now is parameterized by the request type, this allows the user to submit more tasks to the queue after the task queue has begun.
1 parent 33f2d65 commit 43d6461

7 files changed

+197
-60
lines changed

CMakeLists.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
cmake_minimum_required(VERSION 3.12)
2-
project(libdistributed VERSION "0.0.3" LANGUAGES CXX)
2+
project(libdistributed VERSION "0.0.4" LANGUAGES CXX)
33

44
#correct was to set a default build type
55
# https://blog.kitware.com/cmake-and-the-default-build-type/
@@ -26,12 +26,13 @@ include(GNUInstallDirs)
2626

2727
find_package(MPI COMPONENTS CXX)
2828

29+
option(BUILD_SHARED_LIBS "BUILD SHARED LIBRARIES" ON)
2930
add_library(libdistributed
3031
#core features
3132
src/work_queue.cc
3233

3334
#public headers
34-
include/libdistributed_stop_token.h
35+
include/libdistributed_task_manager.h
3536
include/libdistributed_types.h
3637
include/libdistributed_work_queue.h
3738

README.md

+9-8
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,19 @@ int main(int argc, char *argv[])
3838

3939
queue::work_queue(
4040
MPI_COMM_WORLD, std::begin(tasks), std::end(tasks),
41-
[](request req, queue::StopToken& token) {
41+
[](request req, queue::TaskManager<request>& manager) {
4242
//code in this lambda expression gets run once for each task
4343
auto [i] = req;
4444
std::cout << "worker got i=" << i << std::endl;
4545

4646
// if the request is request 0, request termination
4747
// otherwise sleep for 150ms in 50ms increments
4848
if (i != 0) {
49-
for (int j = 0; j < 3 && !token.stop_requested(); ++j) {
49+
for (int j = 0; j < 3 && !manager.stop_requested(); ++j) {
5050
std::this_thread::sleep_for(50ms);
5151
}
5252
} else {
53-
token.request_stop();
53+
manager.request_stop();
5454
}
5555

5656
return std::make_tuple(i, std::pow(i, 2));
@@ -76,12 +76,13 @@ int main(int argc, char *argv[])
7676
7777
## Getting Started
7878
79-
After skimming the example, LibDistributed has a few major types that you will need to use:
79+
After skimming the example, LibDistributed has a few major headers that you will need to use:
8080
81-
Type | Use
82-
-------------------------|----------------------------------------------------------------------
83-
`work_queue.h` | A distributed work queue with cancellation support
84-
`types.h` | Uses templates to create `MPI_Datatype`s
81+
Type | Use
82+
--------------------------------|----------------------------------------------------------------------
83+
`libdistributed_work_queue.h` | A distributed work queue with cancellation support
84+
`libdistributed_task_manager.h` | The manager used to control the work queue
85+
`libdistributed_types.h` | Uses templates to create `MPI_Datatype`s
8586
8687
## Dependencies
8788

include/libdistributed_stop_token.h include/libdistributed_task_manager.h

+11-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
/**
55
* \file
6-
* \brief distributed cancellation request token for `work_queue`
6+
* \brief Manage a running `work_queue`
77
*/
88

99
namespace distributed {
@@ -13,9 +13,10 @@ namespace queue {
1313
* A distributed token which can be used to request stopping computation on a
1414
* work_queue
1515
*/
16-
class StopToken {
16+
template <class RequestType>
17+
class TaskManager {
1718
public:
18-
virtual ~StopToken()=default;
19+
virtual ~TaskManager()=default;
1920
/**
2021
* \returns true if a stop has been requested and recieved by this rank
2122
*/
@@ -29,6 +30,13 @@ class StopToken {
2930
* see if they should attempt to terminate early.
3031
*/
3132
virtual void request_stop()=0;
33+
34+
/**
35+
* Ask the manager to queue a new request
36+
*
37+
* \param[in] request the request that you would like the master to enqueue
38+
*/
39+
virtual void push(RequestType const& request)=0;
3240
};
3341

3442
}

include/libdistributed_work_queue.h

+14-14
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include <mpi.h>
44
#include <utility>
55
#include "libdistributed_types.h"
6-
#include "libdistributed_stop_token.h"
6+
#include "libdistributed_task_manager.h"
77
#include "libdistributed_work_queue_impl.h"
88

99
/**
@@ -25,13 +25,13 @@ namespace queue {
2525
*
2626
* 1. `ResponseType worker_fn(RequestType)`
2727
* 2. `Container<ResponseType> worker_fn(RequestType)`
28-
* 3. `ResponseType worker_fn(RequestType, StopToken&)`
29-
* 4. `Container<ResponseType> worker_fn(RequestType, StopToken&)`
28+
* 3. `ResponseType worker_fn(RequestType, TaskManager<RequestType>&)`
29+
* 4. `Container<ResponseType> worker_fn(RequestType, TaskManager<RequestType>&)`
3030
*
31-
* The versions that take a `StopToken&`, pass a subclass of StopToken which
32-
* allows the caller to request that the remaining tasks in the queue be
33-
* canceled and that other tasks that are currently running be cooperatively
34-
* notified that they can stop.
31+
* The versions that take a `TaskManager<RequestType>&`, pass a subclass of
32+
* TaskManager which allows the caller to request that the remaining tasks in
33+
* the queue be canceled and that other tasks that are currently running be
34+
* cooperatively notified that they can stop.
3535
*
3636
* The versions that return a `Container<ResponseType>` -- a type that
3737
* conforms to the Container named requirement with element type ResponseType.
@@ -40,14 +40,14 @@ namespace queue {
4040
* `master_fn` can have one of two possible signatures:
4141
*
4242
* 1. void master_fn(ResponseType)
43-
* 2. void master_fn(ResponseType, StopToken&)
43+
* 2. void master_fn(ResponseType, TaskManager<RequestType>&)
4444
*
45-
* The version that takes a `StopToken&`, passes a subclass of StopToken which
46-
* allows the caller to request that the remaining tasks in the queue be
47-
* canceled and that other tasks that are currently running be cooperatively
48-
* notified that they can stop.
45+
* The version that takes a `TaskManager<RequestType>&`, passes a subclass of
46+
* `TaskManager<RequestType>&` which allows the caller to request that the
47+
* remaining tasks in the queue be canceled and that other tasks that are
48+
* currently running be cooperatively notified that they can stop.
4949
*
50-
* \see StopToken for details on the semantics about cancellation
50+
* \see TaskManager<RequestType> for details on the semantics about cancellation
5151
*/
5252
template <class TaskForwardIt, class WorkerFunction, class MasterFunction>
5353
void work_queue (
@@ -69,7 +69,7 @@ void work_queue (
6969
impl::maybe_stop_token(
7070
worker_fn,
7171
std::declval<RequestType>(),
72-
std::declval<StopToken&>()
72+
std::declval<TaskManager<RequestType>&>()
7373
)
7474
)>::type;
7575

include/libdistributed_work_queue_impl.h

+88-31
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include <type_traits>
44
#include <mpi.h>
55

6-
#include "libdistributed_stop_token.h"
6+
#include "libdistributed_task_manager.h"
77

88
namespace distributed {
99
namespace queue {
@@ -41,16 +41,21 @@ constexpr int ROOT = 0;
4141
enum class worker_status: int {
4242
done = 1,
4343
more = 2,
44-
cancel = 3
44+
cancel = 3,
45+
new_task = 4
4546
};
4647

4748

48-
class WorkerStopToken : public StopToken
49+
template <class RequestType, class ResponseType>
50+
class WorkerTaskManager : public TaskManager<RequestType>
4951
{
5052
public:
51-
WorkerStopToken(MPI_Comm comm, MPI_Request request)
52-
: comm(comm)
53+
WorkerTaskManager(MPI_Comm comm, MPI_Request request, MPI_Datatype request_dtype, MPI_Datatype response_dtype)
54+
: TaskManager<RequestType>()
55+
, comm(comm)
5356
, stop_request(request)
57+
, request_type(request_dtype)
58+
, response_type(response_dtype)
5459
, flag(0)
5560
{}
5661

@@ -70,20 +75,42 @@ class WorkerStopToken : public StopToken
7075
MPI_Wait(&request, MPI_STATUS_IGNORE);
7176
}
7277

78+
void push(RequestType const& request) override {
79+
ResponseType response;
80+
MPI_Request mpi_request;
81+
//let master know a new task is coming
82+
MPI_Isend(&response, 1, response_type, 0, (int)worker_status::new_task, comm, &mpi_request);
83+
MPI_Wait(&mpi_request, MPI_STATUS_IGNORE);
84+
85+
//send the new request to the master
86+
MPI_Isend(&request, 1, request_type, 0, (int)worker_status::new_task, comm, &mpi_request);
87+
MPI_Wait(&mpi_request, MPI_STATUS_IGNORE);
88+
89+
}
90+
7391
private:
7492
MPI_Comm comm;
7593
MPI_Request stop_request;
94+
MPI_Datatype request_type;
95+
MPI_Datatype response_type;
7696
int flag;
7797
};
7898

79-
class MasterStopToken : public StopToken
99+
template <class RequestType>
100+
class MasterTaskManager : public TaskManager<RequestType>
80101
{
81102
public:
82-
MasterStopToken(MPI_Comm comm)
83-
: StopToken(),
103+
template <class TaskIt>
104+
MasterTaskManager(MPI_Comm comm, TaskIt begin, TaskIt end)
105+
: TaskManager<RequestType>(),
84106
comm(comm),
85107
is_stop_requested(0)
86-
{}
108+
{
109+
while(begin != end) {
110+
requests.emplace(*begin);
111+
++begin;
112+
}
113+
}
87114

88115
bool stop_requested() override {
89116
return is_stop_requested == 1;
@@ -96,9 +123,30 @@ class MasterStopToken : public StopToken
96123
MPI_Wait(&request, MPI_STATUS_IGNORE);
97124
}
98125

126+
void push(RequestType const& request) override {
127+
requests.emplace(request);
128+
}
129+
130+
void pop() {
131+
requests.pop();
132+
}
133+
134+
RequestType const& front() const {
135+
return requests.front();
136+
}
137+
138+
RequestType& front() {
139+
return requests.front();
140+
}
141+
142+
bool empty() const {
143+
return requests.empty();
144+
}
145+
99146
private:
100147
MPI_Comm comm;
101148
int is_stop_requested;
149+
std::queue<RequestType> requests;
102150
};
103151

104152
template <class RequestType, class ResponseType, class TaskForwardIt, class Function>
@@ -112,18 +160,20 @@ void master(MPI_Comm comm, MPI_Datatype request_dtype, MPI_Datatype response_dty
112160
workers.push(i);
113161
}
114162

115-
MasterStopToken stop_token(comm);
163+
//create task queue
164+
165+
MasterTaskManager<RequestType> task_manager(comm, tasks_begin, tasks_end);
116166

117167
int outstanding = 0;
118-
while((tasks_begin != tasks_end and !stop_token.stop_requested()) or outstanding > 0) {
168+
while((!task_manager.empty() and !task_manager.stop_requested()) or outstanding > 0) {
119169

120-
while(tasks_begin != tasks_end and !stop_token.stop_requested() and !workers.empty()) {
170+
while(!task_manager.empty() and !task_manager.stop_requested() and !workers.empty()) {
121171
int worker_id = workers.front();
122172
++outstanding;
123173
workers.pop();
124174

125-
RequestType request = *tasks_begin;
126-
++tasks_begin;
175+
RequestType request = std::move(task_manager.front());
176+
task_manager.pop();
127177

128178
MPI_Request mpi_request;
129179
MPI_Isend(&request, 1, request_dtype, worker_id, (int)worker_status::more, comm, &mpi_request);
@@ -138,19 +188,26 @@ void master(MPI_Comm comm, MPI_Datatype request_dtype, MPI_Datatype response_dty
138188
MPI_Wait(&mpi_response, &response_status);
139189
switch(worker_status(response_status.MPI_TAG)) {
140190
case worker_status::more:
141-
maybe_stop_token(master_fn, response, stop_token);
191+
maybe_stop_token(master_fn, response, task_manager);
142192
break;
143193
case worker_status::done:
144194
workers.push(response_status.MPI_SOURCE);
145195
outstanding--;
146196
break;
147197
case worker_status::cancel:
148-
stop_token.request_stop();
198+
task_manager.request_stop();
199+
break;
200+
case worker_status::new_task:
201+
RequestType request;
202+
MPI_Request mpi_request;
203+
MPI_Irecv(&request, 1, request_dtype, response_status.MPI_SOURCE, (int)worker_status::new_task, comm, &mpi_request);
204+
MPI_Wait(&mpi_request, MPI_STATUS_IGNORE);
205+
task_manager.push(request);
149206
break;
150207
}
151208
}
152209

153-
if(not stop_token.stop_requested()) stop_token.request_stop();
210+
if(not task_manager.stop_requested()) task_manager.request_stop();
154211

155212
while(not workers.empty()) {
156213
int worker_id = workers.front();
@@ -198,38 +255,38 @@ worker_send(MPI_Comm comm, MPI_Datatype response_dtype, ValueType value)
198255
MPI_Wait(&request, MPI_STATUS_IGNORE);
199256
}
200257

201-
template <typename Function, class Message,
258+
template <typename Function, class Message, class RequestType,
202259
typename = void>
203260
struct takes_stop_token : std::false_type
204261
{};
205262

206-
template <typename Function, class Message>
263+
template <typename Function, class Message, class RequestType>
207264
struct takes_stop_token<
208-
Function, Message,
265+
Function, Message, RequestType,
209266
std::void_t<decltype(std::declval<Function>()(
210-
std::declval<Message>(), std::declval<StopToken&>()))>> : std::true_type
267+
std::declval<Message>(), std::declval<TaskManager<RequestType>&>()))>> : std::true_type
211268
{};
212269

213-
template <class Function, class Message, class Enable = void>
270+
template <class Function, class Message, class RequestType, class Enable = void>
214271
struct maybe_stop_token_impl {
215-
static auto call(Function f, Message m, StopToken&) {
272+
static auto call(Function f, Message m, TaskManager<RequestType>&) {
216273
return f(m);
217274
}
218275
};
219276

220277

221-
template <class Function, class Message>
222-
struct maybe_stop_token_impl<Function, Message,
223-
typename std::enable_if_t<takes_stop_token<Function,Message>::value>> {
224-
static auto call(Function f, Message m, StopToken& s) {
278+
template <class Function, class Message, class RequestType>
279+
struct maybe_stop_token_impl<Function, Message, RequestType,
280+
typename std::enable_if_t<takes_stop_token<Function,Message, RequestType>::value>> {
281+
static auto call(Function f, Message m, TaskManager<RequestType>& s) {
225282
return f(m,s);
226283
}
227284
};
228285

229-
template <class Function, class Message>
230-
auto maybe_stop_token(Function f, Message m, StopToken& s)
286+
template <class Function, class Message, class RequestType>
287+
auto maybe_stop_token(Function f, Message m, TaskManager<RequestType>& s)
231288
{
232-
return maybe_stop_token_impl<Function, Message>::call(f, m, s);
289+
return maybe_stop_token_impl<Function, Message, RequestType>::call(f, m, s);
233290
}
234291

235292
template <class RequestType, class ResponseType, class Function>
@@ -241,7 +298,7 @@ void worker(MPI_Comm comm, MPI_Datatype request_dtype, MPI_Datatype response_dty
241298
int done = 0;
242299
MPI_Request stop_request;
243300
MPI_Ibcast(&done, 1, MPI_INT, ROOT, comm, &stop_request);
244-
WorkerStopToken stop_token(comm, stop_request);
301+
WorkerTaskManager<RequestType, ResponseType> stop_token(comm, stop_request, request_dtype, response_dtype);
245302

246303
bool worker_done = false;
247304
while(!worker_done) {

test/simple_queue.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ int main(int argc, char *argv[])
2929

3030
queue::work_queue(
3131
MPI_COMM_WORLD, std::begin(tasks), std::end(tasks),
32-
[](request req, queue::StopToken& token) {
32+
[](request req, queue::TaskManager<request>& token) {
3333
//code in this lambda expression gets run once for each task
3434
auto [i] = req;
3535
std::cout << "worker got i=" << i << std::endl;

0 commit comments

Comments
 (0)