|
| 1 | +// Multi-PE reconverse test for CmiCreateDecrementToEnqueue / CmiDecrementCounter |
| 2 | +// Intended to run with an arbitrary number of PEs. |
| 3 | +// Assumption: the counter is created on PE 0 with initialCount = 4 * CmiNumPes() |
| 4 | +// (the user requested "4*CmiMyPe()" but that would be zero on PE 0; to make the |
| 5 | +// test meaningful across arbitrary PEs we initialize to 4 * number of PEs so |
| 6 | +// each PE can send 4 messages to PE 0). |
| 7 | + |
| 8 | +#include "converse.h" |
| 9 | +#include <string.h> |
| 10 | + |
| 11 | +// Global pointer to the DecrementToEnqueueMsg created on PE 0. Other PEs do not |
| 12 | +// need direct access to its fields; PE 0 will own and use this pointer when |
| 13 | +// decrementing. Making it global simplifies the broadcast message. |
| 14 | +static DecrementToEnqueueMsg *g_dte = NULL; |
| 15 | +static int g_decInvH = -1; |
| 16 | + |
| 17 | +// Handler called when final message is delivered (counter reached zero). |
| 18 | +void exit_handler(void *msg) { |
| 19 | + CmiPrintf("Exit handler: counter reached zero on PE %d\n", CmiMyPe()); |
| 20 | + CmiFreeDecrementToEnqueue(g_dte); |
| 21 | + CmiExit(0); |
| 22 | +} |
| 23 | + |
| 24 | +// Handler that will be invoked on PE0 for each incoming decrement-invoker |
| 25 | +// message. It calls CmiDecrementCounter on the global DTE. |
| 26 | +void decrement_invoker(void *msg) { |
| 27 | + (void)msg; // incoming message payload is not used |
| 28 | + |
| 29 | + //CmiPrintf("decrement_invoker: PE %d decrementing counter\n", CmiMyPe()); |
| 30 | + |
| 31 | + // Call the decrement operation on the global DTE |
| 32 | + CmiDecrementCounter(g_dte); |
| 33 | + |
| 34 | + // Free the incoming message |
| 35 | + CmiFree(msg); |
| 36 | +} |
| 37 | + |
| 38 | +// Handler invoked on every PE when the broadcast arrives. Each PE will send 4 |
| 39 | +// small messages to PE 0; those messages will trigger decrement_invoker on PE0. |
| 40 | +void broadcast_handler(void *msg) { |
| 41 | + // The broadcast carries no extra payload for this test; simply send 4 |
| 42 | + // messages to PE 0. PE 0 owns the global DTE (g_dte) and will perform the |
| 43 | + // decrements when it receives these messages. |
| 44 | + (void)msg; // unused |
| 45 | + |
| 46 | + int dest = 0; |
| 47 | + |
| 48 | + // Send 4 messages to PE 0. The messages themselves carry no useful payload |
| 49 | + // other than the header and are used only to trigger the decrement handler |
| 50 | + // on PE 0. |
| 51 | + //CmiPrintf("broadcast_handler: PE %d sending 4 decrement messages to PE 0\n", CmiMyPe()); |
| 52 | + for (int i = 0; i < 4; ++i) { |
| 53 | + int sendSize = (int)sizeof(CmiMessageHeader); |
| 54 | + char *smsg = (char *)CmiAlloc(sendSize); |
| 55 | + memset(smsg, 0, sendSize); |
| 56 | + |
| 57 | + // set handler to the pre-registered decrement_invoker |
| 58 | + CmiSetHandler(smsg, g_decInvH); |
| 59 | + CmiMessageHeader *sendHdr = (CmiMessageHeader *)smsg; |
| 60 | + sendHdr->destPE = dest; |
| 61 | + sendHdr->messageSize = sendSize; |
| 62 | + |
| 63 | + // send to PE 0 and free the buffer if the send copies it; use SyncSendAndFree |
| 64 | + CmiSyncSendAndFree(dest, sendSize, smsg); |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +// Start function: PE 0 creates the DecrementToEnqueueMsg with initialCount = |
| 69 | +// 4 * CmiNumPes(), then broadcasts a message carrying the dte pointer. All |
| 70 | +// PEs will receive the broadcast and send 4 messages to PE 0 which trigger |
| 71 | +// decrements. |
| 72 | +void test_start(int argc, char **argv) { |
| 73 | + (void)argc; (void)argv; |
| 74 | + int exitH = CmiRegisterHandler((CmiHandler)exit_handler); |
| 75 | + int bcastH = CmiRegisterHandler((CmiHandler)broadcast_handler); |
| 76 | + |
| 77 | + // register the decrement invoker once and store its handler globally so |
| 78 | + // broadcast_handler can reuse it when composing outgoing messages. |
| 79 | + g_decInvH = CmiRegisterHandler((CmiHandler)decrement_invoker); |
| 80 | + |
| 81 | + int numPes = CmiNumPes(); |
| 82 | + int initial = 4 * numPes; // 4 messages per PE |
| 83 | + |
| 84 | + if (CmiMyPe() == 0) { |
| 85 | + // create final message that will be sent when counter reaches zero |
| 86 | + int finalSize = (int)sizeof(CmiMessageHeader); |
| 87 | + void *finalMsg = CmiAlloc(finalSize); |
| 88 | + memset(finalMsg, 0, finalSize); |
| 89 | + CmiSetHandler(finalMsg, exitH); |
| 90 | + CmiMessageHeader *fhdr = (CmiMessageHeader *)finalMsg; |
| 91 | + fhdr->destPE = 0; |
| 92 | + fhdr->messageSize = finalSize; |
| 93 | + |
| 94 | + g_dte = CmiCreateDecrementToEnqueue(finalMsg, (unsigned int)initial); |
| 95 | + // Ensure stores to g_dte and its internals are visible to other PEs/threads. |
| 96 | + //CmiMemoryWriteFence(); |
| 97 | + //CmiPrintf("[PE %d] created g_dte=%p, counter=%p (initial=%d)\n", CmiMyPe(), (void*)g_dte, (void*)(g_dte?g_dte->counter:NULL), initial); |
| 98 | + } |
| 99 | + |
| 100 | + // Ensure that PE 0 has finished creating g_dte before anyone reacts to the |
| 101 | + // broadcast. This prevents races where receivers send messages that reach |
| 102 | + // PE 0 before g_dte is initialized, which would cause CmiDecrementCounter to |
| 103 | + // see a null counter. |
| 104 | + CmiNodeAllBarrier(); |
| 105 | + //CmiPrintf("[PE %d] passed node barrier\n", CmiMyPe()); |
| 106 | + |
| 107 | + // Build a small broadcast message. Receivers will consult the global g_dte |
| 108 | + // (which is valid on PE 0) and send messages to PE 0 to trigger decrements. |
| 109 | + int bsize = (int)sizeof(CmiMessageHeader); |
| 110 | + void *bmsg = CmiAlloc(bsize); |
| 111 | + memset(bmsg, 0, bsize); |
| 112 | + CmiMessageHeader *bhdr = (CmiMessageHeader *)bmsg; |
| 113 | + bhdr->messageSize = bsize; |
| 114 | + CmiSetHandler(bmsg, bcastH); |
| 115 | + |
| 116 | + // Broadcast to all PEs |
| 117 | + if (CmiMyPe() == 0) CmiSyncBroadcastAllAndFree(bsize, bmsg); |
| 118 | + |
| 119 | + // Return from start; scheduler will process incoming messages and the exit |
| 120 | + // handler will terminate when the counter reaches zero. |
| 121 | +} |
| 122 | + |
| 123 | +int main(int argc, char **argv) { |
| 124 | + ConverseInit(argc, argv, test_start, 0, 0); |
| 125 | + return 0; |
| 126 | +} |
0 commit comments