
Hi, Example code to sort a vector on GPU (GeForce GT 750M) generates SEGV. However, it runs fine on Iris Pro (Intel(R) Core(TM) i7-4850HQ CPU). Having said that, it works as long as size of vector being sorted is small(<2 million). The code throws an exception if the size of the vector is less 9 million. Exception: boost/1_61_0/include/boost/compute/command_queue.hpp(453): Throw in function boost::compute::event boost::compute::command_queue::enqueue_write_buffer(const boost::compute::buffer &, size_t, size_t, const void *, const boost::compute::wait_list &) Dynamic exception type: boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector <boost::compute::opencl_error> > std::exception::what: Invalid Value Another observation: if the size of vector is 50 million the sorting works fine, though the timings are worse than Iris Pro. Regards, Prashant ----------------------------Cut here------------------------------------- #include <iostream> #include <vector> #include <algorithm> #include <boost/foreach.hpp> #include <boost/compute/core.hpp> #include <boost/compute/platform.hpp> #include <boost/compute/algorithm.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/functional/math.hpp> #include <boost/compute/types/builtin.hpp> #include <boost/compute/function.hpp> #include <boost/chrono/include.hpp> #include <boost/exception/all.hpp> namespace compute = boost::compute; int main(int argc, char* argv[]) { if (argc != 2) { std::cout << "Usage: " << argv[0] << " <size> " << std::endl; return 0; } // generate random data on the host std::vector<float> host_vector(atoi(argv[1])); std::generate(host_vector.begin(), host_vector.end(), rand); std::cout << "===============CPU==================\n"; for (size_t k=0; k<5; k++) { std::vector<float> host_copy_vector(host_vector); auto start = std::chrono::high_resolution_clock::now(); std::sort(host_copy_vector.begin(), host_copy_vector.end()); auto duration = std::chrono::duration_cast<std::chrono::milliseconds> (std::chrono::high_resolution_clock::now() - start); std::cout << "time: iteration ("<< k << ") : " << duration.count() << " ms" << std::endl; } std::vector<compute::platform> platforms = compute::system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ const compute::platform &platform = platforms[i]; std::cout << "Platform '" << platform.name() << "'" << std::endl; std::vector<compute::device> devices = platform.devices(); for(size_t j = 0; j < devices.size(); j++){ const compute::device &device = devices[j]; std::string type; if(device.type() & compute::device::gpu) type = "GPU Device"; else if(device.type() & compute::device::cpu) type = "CPU Device"; else if(device.type() & compute::device::accelerator) type = "Accelerator Device"; else type = "Unknown Device"; if (type != "GPU Device") { std::cout << "Ignoring non GPU devices.\n"; continue; } std::cout << "====\n"; std::cout << " " << type << ": " << device.name() << std::endl; std::cout << "====\n"; compute::context context(device); compute::command_queue queue(context, device); for (size_t k=0; k<5; k++) { compute::vector<float> device_vector(host_vector.size(), context); // copy data from the host to the device compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); auto start = std::chrono::high_resolution_clock::now(); try { compute::sort(device_vector.begin(), device_vector.end(), queue); } catch (boost::exception & e) { std::cerr << diagnostic_information(e); break; } auto duration = std::chrono::duration_cast<std::chrono::milliseconds> (std::chrono::high_resolution_clock::now() - start); std::cout << "time: iteration ("<< k << ") : " << duration.count() << " ms" << std::endl; } std::cout << "====\n"; } } return 0; }