[mpi] - why this code hangs on gather?

hello boost users please look (if you find few minutes) code bellow first i'm declaring molecule class (with needed serialization) in main() first I'm building vector of data (which are molecules) then vector is divided to chunks and send in point to point nonblocking communication model to rest of mpi processes. after that some calculation on molecules are done. at the end I want gather all data but the program hangs on gather in proces with rank 0 does anyone has idea what is wrong in this code? tom #include <vector> #include <algorithm> #include <boost/mpi.hpp> #include <boost/serialization/string.hpp> #include <boost/serialization/vector.hpp> #include <boost/lexical_cast.hpp> #include <iostream> #include <cstdlib> #include <string> #include <boost/random/linear_congruential.hpp> #include <boost/random/uniform_real.hpp> #include <boost/random/variate_generator.hpp> #include <boost/format.hpp> namespace mpi = boost::mpi; class molecule { friend std::ostream & operator<<(std::ostream &os, const molecule &prot); friend class boost::serialization::access; template<class Archive> void serialize(Archive & ar, const unsigned int){ ar & x_ & y_ & z_ & number_ & message_; } public: // constructor molecule(){x_=0; y_=0; z_=0; number_= 0; message_ = "";}; molecule(double x, double y, double z, unsigned int n, std::string m) : x_(x), y_(y), z_(z), number_(n), message_(m) {} void print(){ std::cout << (boost::format("%.6f") % x_) << " | "<< (boost::format("%.6f") % y_) << " | " << (boost::format("%.6f") % z_) << " | " << number_ << "\t| " << message_ << std::endl; } double x_; double y_; double z_; unsigned int number_; std::string message_; }; int main(int argc, char* argv[]) { //generator boost::minstd_rand generator_real(42u); //distribution boost::uniform_real<> uni_dist_real(0,1); //functor boost::variate_generator<boost::minstd_rand, boost::uniform_real<> > uni_real(generator_real, uni_dist_real); mpi::environment env(argc, argv); mpi::communicator world; // data construction std::vector<molecule> data; if (world.rank() == 0) { for(unsigned int i=0;i<8020;++i){ std::string txt("see you later aligator"); molecule tmp(1,1,1,i,txt); data.push_back(tmp); } } //broadcast(world,data,0); //we could do it by broadcast (line above) - but lets do it by point to point with only amount of data we need //work vectors std::vector<molecule> res; std::vector<std::vector<molecule> > allRes; // we need to divide our data between processes unsigned int nProcs=world.size(); unsigned int chunkSize=data.size() / nProcs; unsigned int extraBits=data.size() % nProcs; // extra bits we will do in root process if( world.rank() == 0 ){ mpi::request reqs[nProcs-1]; for(unsigned int i=1;i<nProcs;++i){ std::vector<molecule> sendmol; unsigned int pos=extraBits+i*chunkSize; for(unsigned int j=0;j<chunkSize;++j){ sendmol.push_back(data[pos]); pos++; } reqs[i-1] = world.isend(i, 0, sendmol); std::cout<<"msg sended to "<< i <<std::endl;std::cout.flush(); } for(unsigned int i=0;i<chunkSize + extraBits;++i){ molecule tmp = data[i]; tmp.x_ *= uni_real(); tmp.y_ *= uni_real(); tmp.z_ *= uni_real(); tmp.message_ = "from process: " + boost::lexical_cast<std::string>(world.rank()); res.push_back(tmp); } mpi::wait_all(reqs, reqs + nProcs - 1); std::cout<<"calculation finished in 0 "<<std::endl;std::cout.flush(); } std::vector<molecule> recvmol; mpi::request recv; recv = world.irecv(0, 0, recvmol); recv.wait(); std::cout<<"receiving finished "<<world.rank()<<std::endl;std::cout.flush(); for(unsigned int i=0;i<recvmol.size();++i){ molecule tmp = recvmol[i]; tmp.x_ *= uni_real(); tmp.y_ *= uni_real(); tmp.z_ *= uni_real(); tmp.print(); tmp.message_ = "from process: " + boost::lexical_cast<std::string>(world.rank()); res.push_back(tmp); } std::cout<<"calculation finished in "<<world.rank()<<std::endl;std::cout.flush(); //wysylanie wynikow ze wszystkich procesow mpi do procesu 0 if( world.rank() == 0 ){ gather(world,res,allRes,0); std::cout<<"gather finished "<<world.rank()<<std::endl;std::cout.flush(); } else { gather(world,res,0); std::cout<<"gather finished"<<world.rank()<<std::endl;std::cout.flush(); } // output if(world.rank()==0){ for(unsigned int i=0;i<static_cast<unsigned int>(world.size());++i){ std::cout<<"results from process "<<i<<": "<<std::endl; for(int k=0; k<allRes[i].size();k++){ allRes[i][k].print(); } std::cout<<std::endl; } std::cout<<"making output finished "<<world.rank()<<std::endl;std::cout.flush(); } return 0; } ---------------------------------------------------- Kabaret Łowcy.B atakuje! Wygraj płytę DVD. Weź udział w konkursie: http://klik.wp.pl/?adr=http%3A%2F%2Fcorto.www.wp.pl%2Fas%2Fkonkurs_lowcy.html&sid=895

Hi, If I understood your code correctly, you are sending every piece of data through rank 0 and then it waits for the completion of the operations. Then, every rank receives data, as well as the first rank. But the first rank is waiting for ots own receive to finish, which can never happen. You should put the wait all after the receptions. Matthieu 2009/10/27 tomasz jankowski <tomasz_jacek@wp.pl>:
hello boost users
please look (if you find few minutes) code bellow first i'm declaring molecule class (with needed serialization) in main() first I'm building vector of data (which are molecules) then vector is divided to chunks and send in point to point nonblocking communication model to rest of mpi processes. after that some calculation on molecules are done. at the end I want gather all data but the program hangs on gather in proces with rank 0
does anyone has idea what is wrong in this code?
tom
#include <vector> #include <algorithm> #include <boost/mpi.hpp> #include <boost/serialization/string.hpp> #include <boost/serialization/vector.hpp> #include <boost/lexical_cast.hpp> #include <iostream> #include <cstdlib> #include <string> #include <boost/random/linear_congruential.hpp> #include <boost/random/uniform_real.hpp> #include <boost/random/variate_generator.hpp> #include <boost/format.hpp>
namespace mpi = boost::mpi;
class molecule {
friend std::ostream & operator<<(std::ostream &os, const molecule &prot); friend class boost::serialization::access;
template<class Archive> void serialize(Archive & ar, const unsigned int){ ar & x_ & y_ & z_ & number_ & message_; }
public: // constructor molecule(){x_=0; y_=0; z_=0; number_= 0; message_ = "";}; molecule(double x, double y, double z, unsigned int n, std::string m) : x_(x), y_(y), z_(z), number_(n), message_(m) {} void print(){ std::cout << (boost::format("%.6f") % x_) << " | "<< (boost::format("%.6f") % y_) << " | " << (boost::format("%.6f") % z_) << " | " << number_ << "\t| " << message_ << std::endl;
} double x_; double y_; double z_; unsigned int number_; std::string message_;
};
int main(int argc, char* argv[]) {
//generator boost::minstd_rand generator_real(42u); //distribution boost::uniform_real<> uni_dist_real(0,1); //functor boost::variate_generator<boost::minstd_rand, boost::uniform_real<> > uni_real(generator_real, uni_dist_real);
mpi::environment env(argc, argv); mpi::communicator world;
// data construction std::vector<molecule> data; if (world.rank() == 0) { for(unsigned int i=0;i<8020;++i){ std::string txt("see you later aligator"); molecule tmp(1,1,1,i,txt); data.push_back(tmp); } }
//broadcast(world,data,0); //we could do it by broadcast (line above) - but lets do it by point to point with only amount of data we need
//work vectors std::vector<molecule> res; std::vector<std::vector<molecule> > allRes; // we need to divide our data between processes unsigned int nProcs=world.size(); unsigned int chunkSize=data.size() / nProcs; unsigned int extraBits=data.size() % nProcs;
// extra bits we will do in root process if( world.rank() == 0 ){
mpi::request reqs[nProcs-1];
for(unsigned int i=1;i<nProcs;++i){ std::vector<molecule> sendmol; unsigned int pos=extraBits+i*chunkSize;
for(unsigned int j=0;j<chunkSize;++j){ sendmol.push_back(data[pos]); pos++; }
reqs[i-1] = world.isend(i, 0, sendmol); std::cout<<"msg sended to "<< i <<std::endl;std::cout.flush(); }
for(unsigned int i=0;i<chunkSize + extraBits;++i){ molecule tmp = data[i]; tmp.x_ *= uni_real(); tmp.y_ *= uni_real(); tmp.z_ *= uni_real(); tmp.message_ = "from process: " + boost::lexical_cast<std::string>(world.rank()); res.push_back(tmp); }
mpi::wait_all(reqs, reqs + nProcs - 1); std::cout<<"calculation finished in 0 "<<std::endl;std::cout.flush();
}
std::vector<molecule> recvmol; mpi::request recv; recv = world.irecv(0, 0, recvmol); recv.wait(); std::cout<<"receiving finished "<<world.rank()<<std::endl;std::cout.flush();
for(unsigned int i=0;i<recvmol.size();++i){ molecule tmp = recvmol[i]; tmp.x_ *= uni_real(); tmp.y_ *= uni_real(); tmp.z_ *= uni_real(); tmp.print(); tmp.message_ = "from process: " + boost::lexical_cast<std::string>(world.rank()); res.push_back(tmp); } std::cout<<"calculation finished in "<<world.rank()<<std::endl;std::cout.flush();
//wysylanie wynikow ze wszystkich procesow mpi do procesu 0 if( world.rank() == 0 ){ gather(world,res,allRes,0); std::cout<<"gather finished "<<world.rank()<<std::endl;std::cout.flush();
} else { gather(world,res,0); std::cout<<"gather finished"<<world.rank()<<std::endl;std::cout.flush();
}
// output if(world.rank()==0){ for(unsigned int i=0;i<static_cast<unsigned int>(world.size());++i){ std::cout<<"results from process "<<i<<": "<<std::endl;
for(int k=0; k<allRes[i].size();k++){ allRes[i][k].print(); } std::cout<<std::endl;
} std::cout<<"making output finished "<<world.rank()<<std::endl;std::cout.flush();
}
return 0; }
---------------------------------------------------- Kabaret Łowcy.B atakuje! Wygraj płytę DVD. Weź udział w konkursie: http://klik.wp.pl/?adr=http%3A%2F%2Fcorto.www.wp.pl%2Fas%2Fkonkurs_lowcy.html&sid=895
_______________________________________________ Boost-users mailing list Boost-users@lists.boost.org http://lists.boost.org/mailman/listinfo.cgi/boost-users
-- Information System Engineer, Ph.D. Website: http://matthieu-brucher.developpez.com/ Blogs: http://matt.eifelle.com and http://blog.developpez.com/?blog=92 LinkedIn: http://www.linkedin.com/in/matthieubrucher

Thank You Matthieu for reply,
If I understood your code correctly, you are sending every piece of data through rank 0 and then it waits for the completion of the operations. Then, every rank receives data, as well as the first rank.
I'm sending from 0 to 1,2,3,4,5,6,7 I.m not sending from 0 to 0 (what I understand could cause problems)
But the first rank is waiting for ots own receive to finish, which can never happen. You should put the wait all after the receptions.
(what do you mean "first rank"? 1 or 0?) 1. I removed waiting after isend - still hangs on 2. I dont need wait in 0. i need wait in rest of ranks because I need data for calculation so I modified like mpi::request recv; recv = world.irecv(0, 0, recvmol); recv.wait(); to world.recv(0, 0, recvmol); - still hangs on 3. I did also like I removed waiting after isend and put it before gather in 0 if( world.rank() == 0 ){ mpi::wait_all(reqs, reqs + nProcs - 1); gather(world,res,allRes,0); } else { gather(world,res,0); } still hangs on (doesnt matter with irecv or recv)... tom ---------------------------------------------------- Film Solista: 9/10 Jak dla mnie rewelacyjny! Piękny wzruszający głęboki! Trudna tematyka, ale pokazana w mistrzowski sposób. http://klik.wp.pl/?adr=http%3A%2F%2Fcorto.www.wp.pl%2Fas%2Fsolista.html&sid=900

Dnia 27-10-2009 o godz. 13:49 Matthieu Brucher napisał(a):
Hi,
If I understood your code correctly, you are sending every piece of data through rank 0 and then it waits for the completion of the operations. Then, every rank receives data, as well as the first rank. But the first rank is waiting for ots own receive to finish, which can never happen. You should put the wait all after the receptions.
at last I have found: code done by rest of ranks (non 0) should be closed in brace parenthesis
if( world.rank() == 0 ){
mpi::request reqs[nProcs-1];
for(unsigned int i=1;i<nProcs;++i){ std::vector<molecule> sendmol; unsigned int pos=extraBits+i*chunkSize;
for(unsigned int j=0;j<chunkSize;++j){ sendmol.push_back(data[pos]); pos++; }
reqs[i-1] = world.isend(i, 0, sendmol); std::cout<<"msg sended to "<< i <<std::endl;std::cout.flush(); }
for(unsigned int i=0;i<chunkSize + extraBits;++i){ molecule tmp = data[i]; tmp.x_ *= uni_real(); tmp.y_ *= uni_real(); tmp.z_ *= uni_real(); tmp.message_ = "from process: " + boost::lexical_cast<std::string>(world.rank()); res.push_back(tmp); }
mpi::wait_all(reqs, reqs + nProcs - 1); std::cout<<"calculation finished in 0 "<<std::endl;std::cout.flush();
}
//here should be else{
std::vector<molecule> recvmol; mpi::request recv; recv = world.irecv(0, 0, recvmol); recv.wait(); std::cout<<"receiving finished "<<world.rank()<<std::endl;std::cout.flush();
for(unsigned int i=0;i<recvmol.size();++i){ molecule tmp = recvmol[i]; tmp.x_ *= uni_real(); tmp.y_ *= uni_real(); tmp.z_ *= uni_real(); tmp.print(); tmp.message_ = "from process: " + boost::lexical_cast<std::string>(world.rank()); res.push_back(tmp); } std::cout<<"calculation finished in "<<world.rank()<<std::endl;std::cout.flush();
//here block should be closed }
//wysylanie wynikow ze wszystkich procesow mpi do procesu 0 if( world.rank() == 0 ){ gather(world,res,allRes,0); std::cout<<"gather finished "<<world.rank()<<std::endl;std::cout.flush();
} else { gather(world,res,0); std::cout<<"gather finished"<<world.rank()<<std::endl;std::cout.flush();
}
// output if(world.rank()==0){ for(unsigned int i=0;i<static_cast<unsigned int>(world.size());++i){ std::cout<<"results from process "<<i<<": "<<std::endl;
for(int k=0; k<allRes[i].size();k++){ allRes[i][k].print(); } std::cout<<std::endl;
} std::cout<<"making output finished "<<world.rank()<<std::endl;std::cout.flush();
}
return 0; }
---------------------------------------------------- Film Solista: 9/10 Jak dla mnie rewelacyjny! Piękny wzruszający głęboki! Trudna tematyka, ale pokazana w mistrzowski sposób. http://klik.wp.pl/?adr=http%3A%2F%2Fcorto.www.wp.pl%2Fas%2Fsolista.html&sid=900
participants (2)
-
Matthieu Brucher
-
tomasz jankowski