[Boost-users] [boost::compute] [OS X 10.11.5] [GeForce GT 750M] Segmentation fault

19 May 2016

      Hi,
   Example code to sort a vector on GPU (GeForce GT 750M) generates  SEGV.
   However, it runs fine on Iris Pro (Intel(R) Core(TM) i7-4850HQ CPU).

   Having said that, it works as long as size of vector being sorted is small(<2 million).
   The code throws an exception if the size of the vector is less 9 million.
   Exception: boost/1_61_0/include/boost/compute/command_queue.hpp(453): Throw in 
function boost::compute::event 
boost::compute::command_queue::enqueue_write_buffer(const boost::compute::buffer
&, size_t, size_t, const void *, const boost::compute::wait_list &)
Dynamic exception type: 
boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector
<boost::compute::opencl_error> >
std::exception::what: Invalid Value
   Another observation: if the size of vector is 50 million the sorting works fine, 
though the timings are worse than Iris Pro.

Regards,
Prashant

----------------------------Cut here-------------------------------------
#include <iostream>
#include <vector>
#include <algorithm>
#include <boost/foreach.hpp>
#include <boost/compute/core.hpp>
#include <boost/compute/platform.hpp>
#include <boost/compute/algorithm.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/functional/math.hpp>
#include <boost/compute/types/builtin.hpp>
#include <boost/compute/function.hpp>
#include <boost/chrono/include.hpp>
#include <boost/exception/all.hpp>

namespace compute = boost::compute;

int main(int argc, char* argv[])
{
    if (argc != 2) {
        std::cout << "Usage: " << argv[0] << " <size> " << std::endl;
        return 0;
    }

    // generate random data on the host
    std::vector<float> host_vector(atoi(argv[1]));
    std::generate(host_vector.begin(), host_vector.end(), rand);

    std::cout << 
"===============CPU==================\n";
    for (size_t k=0; k<5; k++)
    {
        std::vector<float> host_copy_vector(host_vector);
        auto start = std::chrono::high_resolution_clock::now();
        std::sort(host_copy_vector.begin(), host_copy_vector.end());

        auto duration = std::chrono::duration_cast<std::chrono::milliseconds>
(std::chrono::high_resolution_clock::now() - start);
        std::cout << "time: iteration ("<< k << ") : " << duration.count() << " ms" << 
std::endl;
    }

    std::vector<compute::platform> platforms = compute::system::platforms();

    for(size_t i = 0; i < platforms.size(); i++){
        const compute::platform &platform = platforms[i];
        std::cout << "Platform '" << platform.name() << "'" << std::endl;
        std::vector<compute::device> devices = platform.devices();

        for(size_t j = 0; j < devices.size(); j++){
            const compute::device &device = devices[j];

            std::string type;
            if(device.type() & compute::device::gpu)
                type = "GPU Device";
            else if(device.type() & compute::device::cpu)
                type = "CPU Device";
            else if(device.type() & compute::device::accelerator)
                type = "Accelerator Device";
            else
                type = "Unknown Device";

            if (type != "GPU Device") {
                std::cout << "Ignoring non GPU devices.\n";
                continue;
            }

            std::cout << 
"====\n";
            std::cout << "  " << type << ": " << device.name() << std::endl;
            std::cout << 
"====\n";
            compute::context context(device);
            compute::command_queue queue(context, device);

            for (size_t k=0; k<5; k++)
            {
                compute::vector<float> device_vector(host_vector.size(), context);

                // copy data from the host to the device
                compute::copy(
                    host_vector.begin(), host_vector.end(), device_vector.begin(), queue
                );

                auto start = std::chrono::high_resolution_clock::now();
                try {
                  compute::sort(device_vector.begin(), device_vector.end(), queue);
                } catch (boost::exception & e) {
                  std::cerr << diagnostic_information(e);
                  break;
                }

                auto duration = std::chrono::duration_cast<std::chrono::milliseconds>
(std::chrono::high_resolution_clock::now() - start);
                std::cout << "time: iteration ("<< k << ") : " << duration.count() << " ms" <<
std::endl;
            }

            std::cout << 
"====\n";
        }
    }

    return 0;
}