
Hi, I'm having some problem with nonblocking p2p communications. This is with boost 1.47 and openmpi-1.5.3 compiled with icpc 12.0.3. Here's an example that exhibits the problem: #include <boost/mpi.hpp> #include <iostream> #include <vector> using namespace boost::mpi; using namespace std; int main(int argc, char* argv[]) { environment env(argc, argv); communicator world; vector<request> handshake_reqs_; for(int i=0; i<world.size(); ++i) { if(i!=world.rank()) { printf("Task %d posting recv from %d\n",world.rank(), i); handshake_reqs_.push_back(world.irecv(i, 13)); printf("Task %d sending to %d\n",world.rank(), i); world.isend(i, 13); } } for(int i=0; i<handshake_reqs_.size(); ++i) { boost::optional<status> s=handshake_reqs_[i].test(); if(s.is_initialized()) { const int source_task = s.get().source(); const int tag = s.get().tag(); printf("Task %d received message tag %d from task %d\n",world.rank(), tag, source_task);cout.flush(); } } } So, essentially, all tasks send a message with tag 13 to all other tasks, who have posted nonblocking receives for such a message. The output from this program is something like: [pjonsson@sunrise03 ~]$ mpirun -np 3 ./a.outTask 0 posting recv from 1 Task 0 sending to 1 Task 1 posting recv from 0 Task 1 sending to 0 Task 1 posting recv from 2 Task 0 posting recv from 2 Task 0 sending to 2 Task 1 sending to 2 Task 1 received message tag 0 from task 0 [sunrise03:06504] *** Process received signal *** [sunrise03:06504] Signal: Segmentation fault (11) [sunrise03:06504] Signal code: Address not mapped (1) [sunrise03:06504] Failing at address: 0x100000037 Task 2 posting recv from 0 Task 2 sending to 0 Task 2 posting recv from 1 Task 0 received message tag 1 from task 0 [sunrise03:06504] [ 0] /lib64/libpthread.so.0 [0x3d6c20eb10] [sunrise03:06504] [ 1] /n/home00/pjonsson/lib/libboost_mpi.so.1.47.0(_ZN5boost3mpi7request4testEv+0xc) [0x2b2fe9863a3c] [sunrise03:06504] [ 2] ./a.out(main+0x35d) [0x40ab37] [sunrise03:06504] [ 3] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3d6b61d994] [sunrise03:06504] [ 4] ./a.out(_ZNSt8ios_base4InitD1Ev+0x41) [0x407039] [sunrise03:06504] *** End of error message *** [sunrise03:06503] *** Process received signal *** [sunrise03:06503] Signal: Segmentation fault (11) [sunrise03:06503] Signal code: Address not mapped (1) [sunrise03:06503] Failing at address: 0x100000037 [sunrise03:06503] [ 0] /lib64/libpthread.so.0 [0x3d6c20eb10] [sunrise03:06503] [ 1] /n/home00/pjonsson/lib/libboost_mpi.so.1.47.0(_ZN5boost3mpi7request4testEv+0xc) [0x2b44a0822a3c] [sunrise03:06503] [ 2] ./a.out(main+0x35d) [0x40ab37] [sunrise03:06503] [ 3] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3d6b61d994] [sunrise03:06503] [ 4] ./a.out(_ZNSt8ios_base4InitD1Ev+0x41) [0x407039] [sunrise03:06503] *** End of error message *** [sunrise03:06505] *** Process received signal *** [sunrise03:06505] Signal: Segmentation fault (11) [sunrise03:06505] Signal code: Address not mapped (1) [sunrise03:06505] Failing at address: 0x100000037 Task 2 sending to 1 Task 2 received message tag 0 from task 0 [sunrise03:06505] [ 0] /lib64/libpthread.so.0 [0x3d6c20eb10] [sunrise03:06505] [ 1] /n/home00/pjonsson/lib/libboost_mpi.so.1.47.0(_ZN5boost3mpi7request4testEv+0xc) [0x2b7c652b4a3c] [sunrise03:06505] [ 2] ./a.out(main+0x35d) [0x40ab37] [sunrise03:06505] [ 3] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3d6b61d994] [sunrise03:06505] [ 4] ./a.out(_ZNSt8ios_base4InitD1Ev+0x41) [0x407039] [sunrise03:06505] *** End of error message *** Sometimes it doesn't crash but just returns garbage for the source/tag, like all messages have source 0 and tag value that is the source task. Using test_some gives similar results. If I write the same using the C API, it works correctly: int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); vector<MPI_Request> handshake_reqs_; for(int i=0; i<size; ++i) { if(i!=rank) { handshake_reqs_.push_back(MPI_Request()); MPI_Irecv(0,0, MPI_INT, i, 13, MPI_COMM_WORLD, &handshake_reqs_.back()); printf("Task %d sending to %d\n",rank, i); MPI_Request r; MPI_Isend(0,0, MPI_INT, i, 13, MPI_COMM_WORLD, &r); } } while(true) { for(int i=0; i<handshake_reqs_.size(); ++i) { int complete; MPI_Status s; MPI_Test(&handshake_reqs_[i], &complete, &s); if(complete) { const int source_task = s.MPI_SOURCE; const int tag = s.MPI_TAG; printf("Task %d received message tag %d from task %d\n",rank, tag, source_task);cout.flush(); MPI_Irecv(0,0, MPI_INT, i, 13, MPI_COMM_WORLD, &handshake_reqs_[i]); } } } } returns: [pjonsson@sunrise03 ~]$ mpirun -np 3 ./a.out Task 2 sending to 0 Task 2 sending to 1 Task 0 sending to 1 Task 0 sending to 2 Task 0 received message tag 13 from task 2 Task 2 received message tag 13 from task 0 Task 1 sending to 0 Task 0 received message tag 13 from task 1 Task 1 sending to 2 Task 2 received message tag 13 from task 1 Task 1 received message tag 13 from task 0 Task 1 received message tag 13 from task 2 Any ideas what might be going wrong? Regards, /Patrik