|
SST/macro
|
Classes | |
| class | parsedumpi |
| A refactored dumpi parser to read the newer binary-format dumpi trace files. More... | |
| class | parsedumpi_callbacks |
| Populate C-style callbacks for a libundumpi parser. More... | |
| class | sumi_transport |
Functions | |
| void | comm_init () |
| void | comm_finalize () |
| int | comm_rank () |
| int | comm_nproc () |
| void | comm_send_header (int dst, const message::ptr &msg) |
| void | comm_cancel_ping (int dst, int tag) |
| void | comm_ping (int dst, int tag, timeout_function *func) |
| void | comm_send_payload (int dst, const message::ptr &msg) |
| void | comm_send (int dst, message::payload_type_t ev, const message::ptr &msg) |
| void | comm_rdma_put (int dst, const message::ptr &msg) |
| void | comm_rdma_get (int dst, const message::ptr &msg) |
| void | comm_nvram_get (int dst, const message::ptr &msg) |
| void | comm_alltoall (void *dst, void *src, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| void | comm_allgather (void *dst, void *src, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| void | comm_allgatherv (void *dst, void *src, int *recv_counts, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| void | comm_gather (int root, void *dst, void *src, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| void | comm_scatter (int root, void *dst, void *src, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| void | comm_bcast (int root, void *buffer, int nelems, int type_size, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| void | comm_allreduce (void *dst, void *src, int nelems, int type_size, int tag, reduce_fxn fxn, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| The total size of the input/result buffer in bytes is nelems*type_size. More... | |
| template<typename data_t , template< typename > class Op> | |
| void | comm_allreduce (void *dst, void *src, int nelems, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| void | comm_reduce (int root, void *dst, void *src, int nelems, int type_size, int tag, reduce_fxn fxn, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| template<typename data_t , template< typename > class Op> | |
| void | comm_reduce (int root, void *dst, void *src, int nelems, int tag, bool fault_aware=false, int context=options::initial_context, communicator *dom=0) |
| void | comm_barrier (int tag, bool fault_aware=false, communicator *dom=0) |
| void | comm_vote (int vote, int tag, vote_fxn fxn, int context=options::initial_context, communicator *dom=0) |
| The total size of the input/result buffer in bytes is nelems*type_size This always run in a fault-tolerant fashion This uses a dynamic tree structure that reconnects partners when failures are detected. More... | |
| template<template< class > class VoteOp> | |
| void | comm_vote (int vote, int tag, int context=options::initial_context, communicator *dom=0) |
| void | comm_kill_node () |
| Helper function. More... | |
| void | comm_kill_process () |
| Helper function. More... | |
| const thread_safe_set< int > & | comm_failed_ranks () |
| const thread_safe_set< int > & | comm_failed_ranks (int context) |
| void | comm_start_heartbeat (double interval) |
| void | comm_stop_heartbeat () |
| collective_done_message::ptr | comm_collective_block (collective::type_t ty, int tag) |
| message::ptr | comm_poll () |
| void | compute (double sec) |
| void | sleep (double sec) |
| void | sleep_until (double sec) |
| int | comm_partner (long node_id) |
| Map a physical node location to its virtual assignment in the communicator. More... | |
| double | wall_time () |
| Every node has exactly the same notion of time - universal, global clock. More... | |
| transport * | sumi_api () |
| void sumi::comm_allgather | ( | void * | dst, |
| void * | src, | ||
| int | nelems, | ||
| int | type_size, | ||
| int | tag, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
| void sumi::comm_allgatherv | ( | void * | dst, |
| void * | src, | ||
| int * | recv_counts, | ||
| int | type_size, | ||
| int | tag, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
| void sumi::comm_allreduce | ( | void * | dst, |
| void * | src, | ||
| int | nelems, | ||
| int | type_size, | ||
| int | tag, | ||
| reduce_fxn | fxn, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
The total size of the input/result buffer in bytes is nelems*type_size.
| dst | Buffer for the result. Can be NULL to ignore payloads. |
| src | Buffer for the input. Can be NULL to ignore payloads. |
| nelems | The number of elements in the input and result buffer. |
| type_size | The size of the input type, i.e. sizeof(int), sizeof(double) |
| tag | A unique tag identifier for the collective |
| fxn | The function that will actually perform the reduction |
| fault_aware | Whether to execute in a fault-aware fashion to detect failures |
| context | The context (i.e. initial set of failed procs) |
Referenced by comm_allreduce().
| void sumi::comm_allreduce | ( | void * | dst, |
| void * | src, | ||
| int | nelems, | ||
| int | tag, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
Definition at line 103 of file sumi.h.
References comm_allreduce(), and comm_reduce().

| void sumi::comm_alltoall | ( | void * | dst, |
| void * | src, | ||
| int | nelems, | ||
| int | type_size, | ||
| int | tag, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
| void sumi::comm_barrier | ( | int | tag, |
| bool | fault_aware = false, |
||
| communicator * | dom = 0 |
||
| ) |
Referenced by comm_reduce().
| void sumi::comm_bcast | ( | int | root, |
| void * | buffer, | ||
| int | nelems, | ||
| int | type_size, | ||
| int | tag, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
| void sumi::comm_cancel_ping | ( | int | dst, |
| int | tag | ||
| ) |
| collective_done_message::ptr sumi::comm_collective_block | ( | collective::type_t | ty, |
| int | tag | ||
| ) |
Referenced by comm_vote().
| const thread_safe_set<int>& sumi::comm_failed_ranks | ( | ) |
Referenced by comm_vote().
| const thread_safe_set<int>& sumi::comm_failed_ranks | ( | int | context | ) |
| void sumi::comm_finalize | ( | ) |
| void sumi::comm_gather | ( | int | root, |
| void * | dst, | ||
| void * | src, | ||
| int | nelems, | ||
| int | type_size, | ||
| int | tag, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
| void sumi::comm_init | ( | ) |
| void sumi::comm_kill_node | ( | ) |
Helper function.
Kill the node that is currently running. This is invoked by an application. This allows an application to die at a very, very specific point in application execution.
Referenced by comm_vote().
| void sumi::comm_kill_process | ( | ) |
Helper function.
Kill the process that is currently running. This only kills the process - it leaves the node alive and well.
Referenced by comm_vote().
| int sumi::comm_nproc | ( | ) |
| void sumi::comm_nvram_get | ( | int | dst, |
| const message::ptr & | msg | ||
| ) |
| int sumi::comm_partner | ( | long | node_id | ) |
Map a physical node location to its virtual assignment in the communicator.
| node_id |
Referenced by comm_vote().
| void sumi::comm_ping | ( | int | dst, |
| int | tag, | ||
| timeout_function * | func | ||
| ) |
| message::ptr sumi::comm_poll | ( | ) |
Referenced by comm_vote().
| int sumi::comm_rank | ( | ) |
| void sumi::comm_rdma_get | ( | int | dst, |
| const message::ptr & | msg | ||
| ) |
| void sumi::comm_rdma_put | ( | int | dst, |
| const message::ptr & | msg | ||
| ) |
| void sumi::comm_reduce | ( | int | root, |
| void * | dst, | ||
| void * | src, | ||
| int | nelems, | ||
| int | type_size, | ||
| int | tag, | ||
| reduce_fxn | fxn, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
Referenced by comm_allreduce(), and comm_reduce().
| void sumi::comm_reduce | ( | int | root, |
| void * | dst, | ||
| void * | src, | ||
| int | nelems, | ||
| int | tag, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
Definition at line 115 of file sumi.h.
References comm_barrier(), comm_reduce(), and comm_vote().

| void sumi::comm_scatter | ( | int | root, |
| void * | dst, | ||
| void * | src, | ||
| int | nelems, | ||
| int | type_size, | ||
| int | tag, | ||
| bool | fault_aware = false, |
||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
| void sumi::comm_send | ( | int | dst, |
| message::payload_type_t | ev, | ||
| const message::ptr & | msg | ||
| ) |
| void sumi::comm_send_header | ( | int | dst, |
| const message::ptr & | msg | ||
| ) |
| dst | The destination to send to |
| void sumi::comm_send_payload | ( | int | dst, |
| const message::ptr & | msg | ||
| ) |
| void sumi::comm_start_heartbeat | ( | double | interval | ) |
Referenced by comm_vote().
| void sumi::comm_stop_heartbeat | ( | ) |
Referenced by comm_vote().
| void sumi::comm_vote | ( | int | vote, |
| int | tag, | ||
| vote_fxn | fxn, | ||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
The total size of the input/result buffer in bytes is nelems*type_size This always run in a fault-tolerant fashion This uses a dynamic tree structure that reconnects partners when failures are detected.
| vote | The vote (currently restricted to integer) from this process |
| nelems | The number of elements in the input and result buffer. |
| tag | A unique tag identifier for the collective |
| fxn | The function that merges vote, usually AND, OR, MAX, MIN |
| context | The context (i.e. initial set of failed procs) |
Referenced by comm_reduce(), and comm_vote().
| void sumi::comm_vote | ( | int | vote, |
| int | tag, | ||
| int | context = options::initial_context, |
||
| communicator * | dom = 0 |
||
| ) |
Definition at line 138 of file sumi.h.
References comm_collective_block(), comm_failed_ranks(), comm_kill_node(), comm_kill_process(), comm_partner(), comm_poll(), comm_start_heartbeat(), comm_stop_heartbeat(), comm_vote(), compute(), sleep(), sleep_until(), sumi_api(), and wall_time().

| void sumi::compute | ( | double | sec | ) |
Referenced by comm_vote(), and sstmac::sw::app::parent_app().
| void sumi::sleep | ( | double | sec | ) |
Referenced by comm_vote(), and sstmac::sw::app::parent_app().
| void sumi::sleep_until | ( | double | sec | ) |
Referenced by comm_vote().
| transport* sumi::sumi_api | ( | ) |
Referenced by comm_vote().
| double sumi::wall_time | ( | ) |
Every node has exactly the same notion of time - universal, global clock.
Thus, if rank 0 starts and 10 minuts later rank 1 starts, even though rank 1 has only been running for 30 seconds, the time will still return 10 mins, 30 seconds.
Referenced by comm_vote().
1.8.11