SST/macro
|
Classes | |
class | parsedumpi |
A refactored dumpi parser to read the newer binary-format dumpi trace files. More... | |
class | parsedumpi_callbacks |
Populate C-style callbacks for a libundumpi parser. More... | |
class | sumi_transport |
Functions | |
void | comm_init () |
void | comm_finalize () |
int | comm_rank () |
int | comm_nproc () |
void | comm_send_header (int dst, const message::ptr &msg) |
void | comm_cancel_ping (int dst, int tag) |
void | comm_ping (int dst, int tag, timeout_function *func) |
void | comm_send_payload (int dst, const message::ptr &msg) |
void | comm_send (int dst, message::payload_type_t ev, const message::ptr &msg) |
void | comm_rdma_put (int dst, const message::ptr &msg) |
void | comm_rdma_get (int dst, const message::ptr &msg) |
void | comm_nvram_get (int dst, const message::ptr &msg) |
void | comm_alltoall (void *dst, void *src, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
void | comm_allgather (void *dst, void *src, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
void | comm_allgatherv (void *dst, void *src, int *recv_counts, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
void | comm_gather (int root, void *dst, void *src, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
void | comm_scatter (int root, void *dst, void *src, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
void | comm_bcast (int root, void *buffer, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
void | comm_allreduce (void *dst, void *src, int nelems, int type_size, int tag, reduce_fxn fxn, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
The total size of the input/result buffer in bytes is nelems*type_size. More... | |
template<typename data_t , template< typename > class Op> | |
void | comm_allreduce (void *dst, void *src, int nelems, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
void | comm_reduce (int root, void *dst, void *src, int nelems, int type_size, int tag, reduce_fxn fxn, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
template<typename data_t , template< typename > class Op> | |
void | comm_reduce (int root, void *dst, void *src, int nelems, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
void | comm_barrier (int tag, bool fault_aware=false, communicator *dom=0) |
void | comm_vote (int vote, int tag, vote_fxn fxn, int context=options::initial_context, communicator *dom=0) |
The total size of the input/result buffer in bytes is nelems*type_size This always run in a fault-tolerant fashion This uses a dynamic tree structure that reconnects partners when failures are detected. More... | |
template<template< class > class VoteOp> | |
void | comm_vote (int vote, int tag, int context=options::initial_context, communicator *dom=0) |
void | comm_kill_node () |
Helper function. More... | |
void | comm_kill_process () |
Helper function. More... | |
const thread_safe_set< int > & | comm_failed_ranks () |
const thread_safe_set< int > & | comm_failed_ranks (int context) |
void | comm_start_heartbeat (double interval) |
void | comm_stop_heartbeat () |
collective_done_message::ptr | comm_collective_block (collective::type_t ty, int tag) |
message::ptr | comm_poll () |
void | compute (double sec) |
void | sleep (double sec) |
void | sleep_until (double sec) |
int | comm_partner (long node_id) |
Map a physical node location to its virtual assignment in the communicator. More... | |
double | wall_time () |
Every node has exactly the same notion of time - universal, global clock. More... | |
transport * | sumi_api () |
void sumi::comm_allgather | ( | void * | dst, |
void * | src, | ||
int | nelems, | ||
int | type_size, | ||
int | tag, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
void sumi::comm_allgatherv | ( | void * | dst, |
void * | src, | ||
int * | recv_counts, | ||
int | type_size, | ||
int | tag, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
void sumi::comm_allreduce | ( | void * | dst, |
void * | src, | ||
int | nelems, | ||
int | type_size, | ||
int | tag, | ||
reduce_fxn | fxn, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
The total size of the input/result buffer in bytes is nelems*type_size.
dst | Buffer for the result. Can be NULL to ignore payloads. |
src | Buffer for the input. Can be NULL to ignore payloads. |
nelems | The number of elements in the input and result buffer. |
type_size | The size of the input type, i.e. sizeof(int), sizeof(double) |
tag | A unique tag identifier for the collective |
fxn | The function that will actually perform the reduction |
fault_aware | Whether to execute in a fault-aware fashion to detect failures |
context | The context (i.e. initial set of failed procs) |
Referenced by comm_allreduce().
void sumi::comm_allreduce | ( | void * | dst, |
void * | src, | ||
int | nelems, | ||
int | tag, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
Definition at line 103 of file sumi.h.
References comm_allreduce(), and comm_reduce().
void sumi::comm_alltoall | ( | void * | dst, |
void * | src, | ||
int | nelems, | ||
int | type_size, | ||
int | tag, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
void sumi::comm_barrier | ( | int | tag, |
bool | fault_aware = false , |
||
communicator * | dom = 0 |
||
) |
Referenced by comm_reduce().
void sumi::comm_bcast | ( | int | root, |
void * | buffer, | ||
int | nelems, | ||
int | type_size, | ||
int | tag, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
void sumi::comm_cancel_ping | ( | int | dst, |
int | tag | ||
) |
collective_done_message::ptr sumi::comm_collective_block | ( | collective::type_t | ty, |
int | tag | ||
) |
Referenced by comm_vote().
const thread_safe_set<int>& sumi::comm_failed_ranks | ( | ) |
Referenced by comm_vote().
const thread_safe_set<int>& sumi::comm_failed_ranks | ( | int | context | ) |
void sumi::comm_finalize | ( | ) |
void sumi::comm_gather | ( | int | root, |
void * | dst, | ||
void * | src, | ||
int | nelems, | ||
int | type_size, | ||
int | tag, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
void sumi::comm_init | ( | ) |
void sumi::comm_kill_node | ( | ) |
Helper function.
Kill the node that is currently running. This is invoked by an application. This allows an application to die at a very, very specific point in application execution.
Referenced by comm_vote().
void sumi::comm_kill_process | ( | ) |
Helper function.
Kill the process that is currently running. This only kills the process - it leaves the node alive and well.
Referenced by comm_vote().
int sumi::comm_nproc | ( | ) |
void sumi::comm_nvram_get | ( | int | dst, |
const message::ptr & | msg | ||
) |
int sumi::comm_partner | ( | long | node_id | ) |
Map a physical node location to its virtual assignment in the communicator.
node_id |
Referenced by comm_vote().
void sumi::comm_ping | ( | int | dst, |
int | tag, | ||
timeout_function * | func | ||
) |
message::ptr sumi::comm_poll | ( | ) |
Referenced by comm_vote().
int sumi::comm_rank | ( | ) |
void sumi::comm_rdma_get | ( | int | dst, |
const message::ptr & | msg | ||
) |
void sumi::comm_rdma_put | ( | int | dst, |
const message::ptr & | msg | ||
) |
void sumi::comm_reduce | ( | int | root, |
void * | dst, | ||
void * | src, | ||
int | nelems, | ||
int | type_size, | ||
int | tag, | ||
reduce_fxn | fxn, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
Referenced by comm_allreduce(), and comm_reduce().
void sumi::comm_reduce | ( | int | root, |
void * | dst, | ||
void * | src, | ||
int | nelems, | ||
int | tag, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
Definition at line 115 of file sumi.h.
References comm_barrier(), comm_reduce(), and comm_vote().
void sumi::comm_scatter | ( | int | root, |
void * | dst, | ||
void * | src, | ||
int | nelems, | ||
int | type_size, | ||
int | tag, | ||
bool | fault_aware = false , |
||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
void sumi::comm_send | ( | int | dst, |
message::payload_type_t | ev, | ||
const message::ptr & | msg | ||
) |
void sumi::comm_send_header | ( | int | dst, |
const message::ptr & | msg | ||
) |
dst | The destination to send to |
void sumi::comm_send_payload | ( | int | dst, |
const message::ptr & | msg | ||
) |
void sumi::comm_start_heartbeat | ( | double | interval | ) |
Referenced by comm_vote().
void sumi::comm_stop_heartbeat | ( | ) |
Referenced by comm_vote().
void sumi::comm_vote | ( | int | vote, |
int | tag, | ||
vote_fxn | fxn, | ||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
The total size of the input/result buffer in bytes is nelems*type_size This always run in a fault-tolerant fashion This uses a dynamic tree structure that reconnects partners when failures are detected.
vote | The vote (currently restricted to integer) from this process |
nelems | The number of elements in the input and result buffer. |
tag | A unique tag identifier for the collective |
fxn | The function that merges vote, usually AND, OR, MAX, MIN |
context | The context (i.e. initial set of failed procs) |
Referenced by comm_reduce(), and comm_vote().
void sumi::comm_vote | ( | int | vote, |
int | tag, | ||
int | context = options::initial_context , |
||
communicator * | dom = 0 |
||
) |
Definition at line 138 of file sumi.h.
References comm_collective_block(), comm_failed_ranks(), comm_kill_node(), comm_kill_process(), comm_partner(), comm_poll(), comm_start_heartbeat(), comm_stop_heartbeat(), comm_vote(), compute(), sleep(), sleep_until(), sumi_api(), and wall_time().
void sumi::compute | ( | double | sec | ) |
Referenced by comm_vote(), and sstmac::sw::app::parent_app().
void sumi::sleep | ( | double | sec | ) |
Referenced by comm_vote(), and sstmac::sw::app::parent_app().
void sumi::sleep_until | ( | double | sec | ) |
Referenced by comm_vote().
transport* sumi::sumi_api | ( | ) |
Referenced by comm_vote().
double sumi::wall_time | ( | ) |
Every node has exactly the same notion of time - universal, global clock.
Thus, if rank 0 starts and 10 minuts later rank 1 starts, even though rank 1 has only been running for 30 seconds, the time will still return 10 mins, 30 seconds.
Referenced by comm_vote().