|
void | SetParam (const char *name, const char *val) override |
| set parameters to the engine
|
|
void | Allreduce (void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer, PreprocFunction prepare_fun, void *prepare_arg) override |
| perform in-place allreduce, on sendrecvbuf this function is NOT thread-safe
|
|
void | Allgather (void *sendrecvbuf, size_t total_size, size_t slice_begin, size_t slice_end, size_t size_prev_slice) override |
| internal Allgather function, each node have a segment of data in the ring of sendrecvbuf, the data provided by current node k is [slice_begin, slice_end), the next node's segment must start with slice_end after the call of Allgather, sendrecvbuf_ contains all the contents including all segments use a ring based algorithm
|
|
void | Broadcast (void *sendrecvbuf_, size_t total_size, int root) override |
| broadcast data from root to all nodes
|
|
int | LoadCheckPoint () override |
| deprecated
|
|
void | CheckPoint () override |
| Increase internal version number. Deprecated.
|
|
virtual bool | Init (int argc, char *argv[]) |
|
virtual bool | Shutdown () |
|
void | TrackerPrint (const std::string &msg) override |
| print the msg in the tracker, this function can be used to communicate the information of the progress to the user who monitors the tracker
|
|
int | GetRingPrevRank () const override |
| get rank of previous node in ring topology
|
|
int | GetRank () const override |
| get rank
|
|
int | GetWorldSize () const override |
| get rank
|
|
bool | IsDistributed () const override |
| whether is distributed or not
|
|
std::string | GetHost () const override |
| get rank
|
|
int | VersionNumber () const override |
|
void | ReportStatus () const |
| report current status to the job tracker depending on the job tracker we are in
|
|
| ~IEngine ()=default |
| virtual destructor
|
|
|
typedef void() | PreprocFunction(void *arg) |
| Preprocessing function, that is called before AllReduce, used to prepare the data used by AllReduce.
|
|
typedef void() | ReduceFunction(const void *src, void *dst, int count, const MPI::Datatype &dtype) |
| reduce function, the same form of MPI reduce function is used, to be compatible with MPI interface In all the functions, the memory is ensured to aligned to 64-bit which means it is OK to cast src,dst to double* int* etc
|
|
static const int | kMagic = 0xff99 |
|
enum | ReturnTypeEnum {
kSuccess
, kConnReset
, kRecvZeroLen
, kSockError
,
kGetExcept
} |
| enumeration of possible returning results from Try functions More...
|
|
xgboost::collective::TCPSocket | ConnectTracker () const |
| initialize connection to the tracker
|
|
bool | ReConnectLinks (const char *cmd="start") |
| connect to the tracker to fix the the missing links this function is also used when the engine start up
|
|
ReturnType | TryAllreduce (void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) |
| perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure
|
|
ReturnType | TryBroadcast (void *sendrecvbuf_, size_t size, int root) |
| broadcast data from root to all nodes, this function can fail,and will return the cause of failure
|
|
ReturnType | TryAllreduceTree (void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) |
| perform in-place allreduce, on sendrecvbuf, this function implements tree-shape reduction
|
|
ReturnType | TryAllgatherRing (void *sendrecvbuf_, size_t total_size, size_t slice_begin, size_t slice_end, size_t size_prev_slice) |
| internal Allgather function, each node have a segment of data in the ring of sendrecvbuf, the data provided by current node k is [slice_begin, slice_end), the next node's segment must start with slice_end after the call of Allgather, sendrecvbuf_ contains all the contents including all segments use a ring based algorithm
|
|
ReturnType | TryReduceScatterRing (void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) |
| perform in-place allreduce, reduce on the sendrecvbuf,
|
|
ReturnType | TryAllreduceRing (void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) |
| perform in-place allreduce, on sendrecvbuf use a ring based algorithm, reduce-scatter + allgather
|
|
ReturnType | ReportError (LinkRecord *link, ReturnType err) |
| function used to report error when a link goes wrong
|
|
static ReturnType | Errno2Return () |
| translate errno to return type
|
|
void rabit::engine::AllreduceMock::Allgather |
( |
void * |
sendrecvbuf_, |
|
|
size_t |
total_size, |
|
|
size_t |
slice_begin, |
|
|
size_t |
slice_end, |
|
|
size_t |
size_prev_slice |
|
) |
| |
|
inlineoverridevirtual |
internal Allgather function, each node have a segment of data in the ring of sendrecvbuf, the data provided by current node k is [slice_begin, slice_end), the next node's segment must start with slice_end after the call of Allgather, sendrecvbuf_ contains all the contents including all segments use a ring based algorithm
- Parameters
-
sendrecvbuf_ | buffer for both sending and receiving data, it is a ring conceptually |
total_size | total size of data to be gathered |
slice_begin | beginning of the current slice |
slice_end | end of the current slice |
size_prev_slice | size of the previous slice i.e. slice of node (rank - 1) % world_size |
Reimplemented from rabit::engine::AllreduceBase.