CMake混合编译CUDA&&使用VS调试

完整代码：

/**  **/
#include <>
#include ""

__global__ void square(float* d_out, float* d_in){
    int idx = threadIdx.x;
    float f = d_in[idx];
    d_out[idx] = f * f;
}

int deploy(){
    const int ARRAY_SIZE = 8;
    const int ARRAY_BYTES = ARRAY_SIZE * sizeof(float);

    float h_in[ARRAY_SIZE];
    float h_out[ARRAY_SIZE];
    for(int i = 0; i < ARRAY_SIZE; i++){
        h_in[i] = float(i);
    }

    float* d_in;
    float* d_out;

    cudaMalloc((void**) &d_in, ARRAY_BYTES);
    cudaMalloc((void**) &d_out, ARRAY_BYTES);

    cudaMemcpy(d_in, h_in, ARRAY_BYTES, cudaMemcpyHostToDevice);
    square<<<1, ARRAY_SIZE>>>(d_out, d_in);
    cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost);

    for(int i = 0; i < ARRAY_SIZE; i++){
        printf("%f", h_out[i]);
        if( i % 4 != 3 )
            printf("\t");
        else
            printf("\n");
    }

    cudaFree(d_in);
    cudaFree(d_out);

    return 0;
}

/*  */
#ifndef gpu_api
#define gpu_api __declspec(dllexport)

extern "C"
gpu_api int deploy();

#endif

/*  */
#include "cuda/"
#include <iostream>
using namespace std;
#pragma comment(lib, "")

int main(){
    deploy();
    return 0;
}

秒客网

CMake混合编译CUDA&&使用VS调试

NVCC的编译平台报错

相关文章