I need to implement non-blocking communication in my application. I am
getting segfault when calling wait() on my irecv requests:
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff7903a69 in
boost::archive::detail::basic_iarchive_impl::load_preamble
(this=0x110b590, ar=..., co=...) at
libs/serialization/src/basic_iarchive.cpp:319
(gdb) bt
#0 0x00007ffff7903a69 in
boost::archive::detail::basic_iarchive_impl::load_preamble
(this=0x110b590, ar=..., co=...) at
libs/serialization/src/basic_iarchive.cpp:319
#1 0x00007ffff7904c52 in
boost::archive::detail::basic_iarchive_impl::load_pointer
(this=0x110b590, ar=..., t=@0x7fffffffd348: 0x2, bpis_ptr=0x0,
bpis_ptr@entry=0x950a50
(first=..., last=...) at /usr/include/boost/mpi/nonblocking.hpp:262 #13 0x000000000044be38 in opice::Communicator::waitall_recv (this=0x9a76c0) at /media/data/prog/workspace/opice/src/communicator.cpp:257 #14 0x0000000000424c14 in opice::Boundary::set_J (this=0xbd2b40) at /media/data/prog/workspace/opice/src/boundary.cpp:423 #15 0x00000000004354a6 in opice::Experiment::run (this=this@entry=0x99d850) at /media/data/prog/workspace/opice/src/experiment.cpp:211 #16 0x0000000000415b02 in main (argc=1, argv=0x7fffffffd708) at /media/data/prog/workspace/opice/src/opice.cpp:41 (gdb)
In the program, parallelization is achieved using domain decomposition.
There is a Boundary class which calls the communication routines, the
boundary has several "BorderComm" objects (two in current version) -
each of them communicates with one neighbor. There is one Communicator
object which holds the data about MPI communicator, handles MPI
initialization, etc.
Relevant parts look somewhat like this:
class Boundary {
public:
void set_J();
private:
BorderComm *sbx_max, *sbx_min;
Communicator* comm_;
};
void Boundary::set_J(){
if(sbx_min) sbx_min->receive_J();
if(sbx_max) sbx_max->receive_J();
if(sbx_max) sbx_max->send_J();
if(sbx_min) sbx_min->send_J();
comm_->waitall_recv();
if(sbx_max) sbx_max->save_J();
if(sbx_min) sbx_min->save_J();
comm_->waitall_send();
}
class BorderComm {
public:
void send_J();
void receive_J();
void save_J();
int neighbor_;
private:
V3Slice *slice_J_border_;
std::vector<double> *recv_array_;
Communicator* comm_;
};
void BorderComm::send_J() {
comm_->send_fields(slice_J_border_, TYPE_J, neighbor_);
}
void BorderComm::receive_J(){
recv_array_ = new std::vector<double>(LEN);
comm_->receive_fields(recv_array_, TYPE_J, neighbor_);
}
void BorderComm::save_J(){
.... put contents of recv_array_ into some V3Slice ....
delete(recv_array_);
}
class Communicator {
public:
void send_fields(V3Slice* slice, PackType pack_type, int recnum);
void receive_fields(std::vector<double>* recv_array, PackType
pack_type, int sendnum);
void waitall_send();
void waitall_recv();
protected:
boost::mpi::communicator *world_;
std::vectorboost::mpi::request send_requests_;
std::vectorboost::mpi::request recv_requests_;
};
void Communicator::send_fields(V3Slice* slice, PackType pack_type, int
recnum) {
std::vector<double> send_array(LEN);
..... fill send_array from V3Slice object .....
send_requests_.push_back(world_->isend(recnum, pack_type, send_array));
}
void Communicator::receive_fields(std::vector<double>* recv_array,
PackType pack_type, int sendnum) {
recv_requests_.push_back(world_->irecv(sendnum, pack_type, recv_array));
}
void Communicator::waitall_send(){
boost::mpi::wait_all(send_requests_.begin(), send_requests_.end());
send_requests_.clear();
}
void Communicator::waitall_recv(){
boost::mpi::wait_all(recv_requests_.begin(), recv_requests_.end());
recv_requests_.clear();
}
Now if I move to frame #13 in the backtrace - to get inside the
Communicator class, and look at the requests, I get:
(gdb) frame 13
#13 0x000000000044be38 in opice::Communicator::waitall_recv
(this=0x9a76c0) at /media/data/prog/workspace/opice/src/communicator.cpp:257
(gdb) print recv_requests_
$1 = std::vector of length 2, capacity 2 = {{m_requests = {0x94f0c0