CUDA Illegal memory access when using virtual class

63 Views Asked by At
                         Base
                         /  \
                        /    \
                       /      \
                    Der1      Der2
                       \      /
                        \    /
                         \  /
                         Join

So, I have been working on a code that relies on a diamond class inheritance structure. Following some articles online (and a very popular FAQ on the dreaded diamond class inheritance), I switched my middle class of the diamond to inherit virtually from the base class. Now, when I construct and pass in the Join class, I encounter illegal memory access.

I would like to understand if what I am doing is fundamentally wrong, so, I ended up creating a simple example to show when the error occurs, and when it doesn't occur.

A simple test kernel:

template<typename join_t>
__global__ void kernel(join_t monster) {
  float val   = monster.get_value_at(1);
  int der1_size = monster.get_total_size();

  printf("value[1] = %f\n", val);
  printf("size = %i\n", der1_size);
}

My classes (I can recreate the issue with just 3 classes, I don't even need the full diamond):

struct base {
  base() {}
  __host__ __device__
  virtual int get_total_size() const = 0;

  void set_base_size(int const& s) { base_size = s; }
  protected:
    int base_size;
};

struct der1 : public virtual base {

  der1() : base() {}

  float* ptr1;
  int size1;

  __host__ __device__ 
  float get_value_at(int const& i) const {
    return ptr1[i];
  }

  __host__ __device__ 
  int get_size() const { return size1; }

  __host__ __device__
  int get_total_size() const override {
    return base::base_size + get_size();
  }
};

struct join : public der1/* , public der2 */ {
  join() : base(), der1() /* , der2() */ {}

  __host__ __device__
  int get_total_size() const override {
    return der1::get_total_size();
  }
};

Some testing code:

template<typename vector_struct_t>
auto set_smart(vector_struct_t& v) {

  join my_container;
  int base_size = 10;
  
  my_container.ptr1 = thrust::raw_pointer_cast(v.data());

  my_container.set_base_size(base_size);
  my_container.size1 = v.size();

  return my_container;
}

int
main(int argc, char** argv)
{
  cudaError_t status = cudaSuccess;

  // let's use thrust vector<type_t> for initial arrays
  thrust::host_vector<value_t>   h_vector(10);
  for (index_t i = 0; i < 10; i++)
    h_vector[i] = i;

  thrust::device_vector<value_t>  d_vector = h_vector;

  auto my_container = set_smart(d_vector);

  // Device Output
  status = cudaDeviceSynchronize();
  if(cudaSuccess != status) return EXIT_FAILURE;
  kernel<<<1, 1>>>(my_container);
  if(cudaSuccess != status) return EXIT_FAILURE;

  return 0;
}

What I get as an output:

what():  an illegal memory access was encountered

What I expect, note this code runs fine if I don't use the keyword virtual for der1 class.

value[1] = 1.000000
size = 20
0

There are 0 best solutions below