Hi,
Example code to sort a vector on GPU (GeForce GT 750M) generates SEGV.
However, it runs fine on Iris Pro (Intel(R) Core(TM) i7-4850HQ CPU).
Having said that, it works as long as size of vector being sorted is small(<2 million).
The code throws an exception if the size of the vector is less 9 million.
Exception: boost/1_61_0/include/boost/compute/command_queue.hpp(453): Throw in
function boost::compute::event
boost::compute::command_queue::enqueue_write_buffer(const boost::compute::buffer
&, size_t, size_t, const void *, const boost::compute::wait_list &)
Dynamic exception type:
boost::exception_detail::clone_implboost::compute::opencl_error >
std::exception::what: Invalid Value
Another observation: if the size of vector is 50 million the sorting works fine,
though the timings are worse than Iris Pro.
Regards,
Prashant
----------------------------Cut here-------------------------------------
#include <iostream>
#include <vector>
#include <algorithm>
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace compute = boost::compute;
int main(int argc, char* argv[])
{
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " <size> " << std::endl;
return 0;
}
// generate random data on the host
std::vector<float> host_vector(atoi(argv[1]));
std::generate(host_vector.begin(), host_vector.end(), rand);
std::cout <<
"===============CPU==================\n";
for (size_t k=0; k<5; k++)
{
std::vector<float> host_copy_vector(host_vector);
auto start = std::chrono::high_resolution_clock::now();
std::sort(host_copy_vector.begin(), host_copy_vector.end());
auto duration = std::chrono::duration_caststd::chrono::milliseconds
(std::chrono::high_resolution_clock::now() - start);
std::cout << "time: iteration ("<< k << ") : " << duration.count() << " ms" <<
std::endl;
}
std::vectorcompute::platform platforms = compute::system::platforms();
for(size_t i = 0; i < platforms.size(); i++){
const compute::platform &platform = platforms[i];
std::cout << "Platform '" << platform.name() << "'" << std::endl;
std::vectorcompute::device devices = platform.devices();
for(size_t j = 0; j < devices.size(); j++){
const compute::device &device = devices[j];
std::string type;
if(device.type() & compute::device::gpu)
type = "GPU Device";
else if(device.type() & compute::device::cpu)
type = "CPU Device";
else if(device.type() & compute::device::accelerator)
type = "Accelerator Device";
else
type = "Unknown Device";
if (type != "GPU Device") {
std::cout << "Ignoring non GPU devices.\n";
continue;
}
std::cout <<
"====\n";
std::cout << " " << type << ": " << device.name() << std::endl;
std::cout <<
"====\n";
compute::context context(device);
compute::command_queue queue(context, device);
for (size_t k=0; k<5; k++)
{
compute::vector<float> device_vector(host_vector.size(), context);
// copy data from the host to the device
compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
auto start = std::chrono::high_resolution_clock::now();
try {
compute::sort(device_vector.begin(), device_vector.end(), queue);
} catch (boost::exception & e) {
std::cerr << diagnostic_information(e);
break;
}
auto duration = std::chrono::duration_caststd::chrono::milliseconds
(std::chrono::high_resolution_clock::now() - start);
std::cout << "time: iteration ("<< k << ") : " << duration.count() << " ms" <<
std::endl;
}
std::cout <<
"====\n";
}
}
return 0;
}