The question is that: is there a way to use the class "vector" in Cuda kernels? When I try I get the following error:
问题是:有没有办法在Cuda内核中使用类“向量”?当我尝试时,我收到以下错误:
error : calling a host function("std::vector<int, std::allocator<int> > ::push_back") from a __device__/__global__ function not allowed
So there a way to use a vector in global section?I recently tried the following:
那么有一种方法可以在全局部分使用向量吗?我最近尝试了以下内容:
- create a new Cuda project
- go to properties of the project
- open Cuda C/C++
- go to Device
- change the value in "Code Generation" to be set to this value:compute_20,sm_20
创建一个新的Cuda项目
转到项目的属性
打开Cuda C / C ++
转到设备
将“代码生成”中的值更改为此值:compute_20,sm_20
........ after that I was able to use the printf standard library function in my Cuda kernel.
........之后我能够在我的Cuda内核中使用printf标准库函数。
is there a way to use the standard library class vector
in the way printf is supported in kernel code? This is an example of using printf in kernel code:
有没有办法在内核代码中支持printf的方式使用标准库类向量?这是在内核代码中使用printf的示例:
// this code only to count the 3s in an array using Cuda//private_count is an array to hold every thread's result separately __global__ void countKernel(int *a, int length, int* private_count) { printf("%d\n",threadIdx.x); //it's print the thread id and it's working // vector<int> y; //y.push_back(0); is there a possibility to do this? unsigned int offset = threadIdx.x * length; int i = offset; for( ; i < offset + length; i++) { if(a[i] == 3) { private_count[threadIdx.x]++; printf("%d ",a[i]); } } }
4 个解决方案
#1
You can't use the STL in CUDA, but you may be able to use the Thrust library to do what you want. Otherwise just copy the contents of the vector to the device and operate on it normally.
您不能在CUDA中使用STL,但您可以使用Thrust库来执行您想要的操作。否则只需将向量的内容复制到设备并正常操作即可。
#2
In the cuda library thrust, you can use thrust::device_vector<classT
> to define a vector on device, and the data transfer between host STL vector and device vector is very straightforward. you can refer to this useful link:http://docs.nvidia.com/cuda/thrust/index.html to find some useful examples.
在cuda库推进中,您可以使用thrust :: device_vector
#3
you can't use std::vector
in device code, you should use array instead.
你不能在设备代码中使用std :: vector,你应该使用数组。
#4
I think you can implement a device vector by youself, because CUDA supports dynamic memory alloction in device codes. Operator new/delete are also supported. Here is an extremely simple prototype of device vector in CUDA, but it does work. It hasn't been tested sufficiently.
我认为您可以自己实现设备向量,因为CUDA支持设备代码中的动态内存分配。还支持运营商新/删除。这是CUDA中一个非常简单的设备向量原型,但确实有效。它尚未经过充分测试。
template<typename T>class LocalVector{private: T* m_begin; T* m_end; size_t capacity; size_t length; __device__ void expand() { capacity *= 2; size_t tempLength = (m_end - m_begin); T* tempBegin = new T[capacity]; memcpy(tempBegin, m_begin, tempLength * sizeof(T)); delete[] m_begin; m_begin = tempBegin; m_end = m_begin + tempLength; length = static_cast<size_t>(m_end - m_begin); }public: __device__ explicit LocalVector() : length(0), capacity(16) { m_begin = new T[capacity]; m_end = m_begin; } __device__ T& operator[] (unsigned int index) { return *(m_begin + index);//*(begin+index) } __device__ T* begin() { return m_begin; } __device__ T* end() { return m_end; } __device__ ~LocalVector() { delete[] m_begin; m_begin = nullptr; } __device__ void add(T t) { if ((m_end - m_begin) >= capacity) { expand(); } new (m_end) T(t); m_end++; length++; } __device__ T pop() { T endElement = (*m_end); delete m_end; m_end--; return endElement; } __device__ size_t getSize() { return length; }};
#1
You can't use the STL in CUDA, but you may be able to use the Thrust library to do what you want. Otherwise just copy the contents of the vector to the device and operate on it normally.
您不能在CUDA中使用STL,但您可以使用Thrust库来执行您想要的操作。否则只需将向量的内容复制到设备并正常操作即可。
#2
In the cuda library thrust, you can use thrust::device_vector<classT
> to define a vector on device, and the data transfer between host STL vector and device vector is very straightforward. you can refer to this useful link:http://docs.nvidia.com/cuda/thrust/index.html to find some useful examples.
在cuda库推进中,您可以使用thrust :: device_vector
#3
you can't use std::vector
in device code, you should use array instead.
你不能在设备代码中使用std :: vector,你应该使用数组。
#4
I think you can implement a device vector by youself, because CUDA supports dynamic memory alloction in device codes. Operator new/delete are also supported. Here is an extremely simple prototype of device vector in CUDA, but it does work. It hasn't been tested sufficiently.
我认为您可以自己实现设备向量,因为CUDA支持设备代码中的动态内存分配。还支持运营商新/删除。这是CUDA中一个非常简单的设备向量原型,但确实有效。它尚未经过充分测试。
template<typename T>class LocalVector{private: T* m_begin; T* m_end; size_t capacity; size_t length; __device__ void expand() { capacity *= 2; size_t tempLength = (m_end - m_begin); T* tempBegin = new T[capacity]; memcpy(tempBegin, m_begin, tempLength * sizeof(T)); delete[] m_begin; m_begin = tempBegin; m_end = m_begin + tempLength; length = static_cast<size_t>(m_end - m_begin); }public: __device__ explicit LocalVector() : length(0), capacity(16) { m_begin = new T[capacity]; m_end = m_begin; } __device__ T& operator[] (unsigned int index) { return *(m_begin + index);//*(begin+index) } __device__ T* begin() { return m_begin; } __device__ T* end() { return m_end; } __device__ ~LocalVector() { delete[] m_begin; m_begin = nullptr; } __device__ void add(T t) { if ((m_end - m_begin) >= capacity) { expand(); } new (m_end) T(t); m_end++; length++; } __device__ T pop() { T endElement = (*m_end); delete m_end; m_end--; return endElement; } __device__ size_t getSize() { return length; }};