有很多同鞋问怎么使用CUDA和其它的编译器连用呢?混合编程?
先吧代码贴出来:
文件1 : test1.cu
//文件:test1.cu #include <stdio.h> #include <stdlib.h> #include <cuda_runtime.h> #define ROWS 32 #define COLS 16 #define CHECK(res) if(res!=cudaSuccess){exit(-1);} __global__ void Kerneltest(int **da, unsigned int rows, unsigned int cols) { unsigned int row = blockDim.y*blockIdx.y + threadIdx.y; unsigned int col = blockDim.x*blockIdx.x + threadIdx.x; if (row < rows && col < cols) { da[row][col] = row*cols + col; } } extern "C" int func() // 注意这里定义形式 { int **da = NULL; int **ha = NULL; int *dc = NULL; int *hc = NULL; cudaError_t res; int r, c; bool is_right=true; res = cudaMalloc((void**)(&da), ROWS*sizeof(int*));CHECK(res) res = cudaMalloc((void**)(&dc), ROWS*COLS*sizeof(int));CHECK(res) ha = (int**)malloc(ROWS*sizeof(int*)); hc = (int*)malloc(ROWS*COLS*sizeof(int)); for (r = 0; r < ROWS; r++) { ha[r] = dc + r*COLS; } res = cudaMemcpy((void*)(da), (void*)(ha), ROWS*sizeof(int*), cudaMemcpyHostToDevice);CHECK(res) dim3 dimBlock(16,16); dim3 dimGrid((COLS+dimBlock.x-1)/(dimBlock.x), (ROWS+dimBlock.y-1)/(dimBlock.y)); Kerneltest<<<dimGrid, dimBlock>>>(da, ROWS, COLS); res = cudaMemcpy((void*)(hc), (void*)(dc), ROWS*COLS*sizeof(int), cudaMemcpyDeviceToHost);CHECK(res) for (r = 0; r < ROWS; r++) { for (c = 0; c < COLS; c++) { printf("%4d ", hc[r*COLS+c]); if (hc[r*COLS+c] != (r*COLS+c)) { is_right = false; } } printf("\n"); } printf("the result is %s!\n", is_right? "right":"false"); cudaFree((void*)da); cudaFree((void*)dc); free(ha); free(hc); // getchar(); return 0; }
文件2:test2.c
#include <stdio.h> int func(); // 注意声明 int main() { func(); return 0; }
文件3 :test3.cpp
#include <iostream> using namespace std; extern "C" int func(); //注意这里的声明 int main() { func(); return 0; }
几个方案可以用:
方案1:
将所有文件分别编译,最后统一合并!
对于C程序
[]$nvcc -c test1.cu []$gcc -c test2.c []$gcc -o testc test1.o test2.o -lcudart -L/usr/local/cuda/lib64
C++ 程序
[]$nvcc -c test1.cu []$g++ -c test3.cpp []$g++ -o testcpp test1.o test3.o -lcudart -L/usr/local/cuda/lib64
方案2:
将CUDA程序弄成静态库
对于C程序
[]$nvcc -lib test1.cu -o libtestcu.a []$gcc test2.c -ltestcu -L. -lcudart -L/usr/local/cuda/lib64 -o testc
特别注意:test2.c在链接库的前面
对于C++
完全域C类似,只要将gcc 换成g++, test2.c换成test3.cpp
方案3:
将CUDA程序弄成动态库
makefile
all : c cpp c : libtestcu.so gcc test2.c -ltestcu -L. -lcudart -L/usr/local/cuda/lib64 -o testc cpp : libtestcu.so g++ test3.cpp -ltestcu -L. -lcudart -L/usr/local/cuda/lib64 -o testcpp libtestcu.so : test.cu nvcc -o libtestcu.so -shared -Xcompiler -fPIC test1.cu clean : rm *.so testc testcpp -f
应该能看懂。