OpenACC 简单的直方图

时间:2024-06-07 22:04:14

▶ 简单的直方图,强调原子操作的使用

● 代码

 #include <stdio.h>
#include <stdlib.h>
#include <openacc.h> int main()
{
const int length = , basket = ;
int data[length], histgram[basket]; srand();
for (int i = ; i < basket; histgram[i++] = );
for (int i = ; i < length; data[i++] = rand() % ); #pragma acc parallel loop
for (int i = ; i < length; i++)
{
#pragma acc atomic update
histgram[data[i]]+=;
} for (int i = ; i < basket; i++)
printf("histgram[%d] = %d\n", i, histgram[i]); getchar();
return ;
}

● 输出结果,在 Windows 里是错的,在 WSL 里是对的

D:\Code\OpenACC\OpenACCProject\OpenACCProject>pgcc -acc -Minfo main.c -o main_acc.exe
main:
, Accelerator kernel generated
Generating Tesla code
, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
, Generating implicit copyout(histgram[data])
Generating implicit copyin(data[:]) D:\Code\OpenACC\OpenACCProject\OpenACCProject>main_acc.exe
launch CUDA kernel file=D:\Code\OpenACC\OpenACCProject\OpenACCProject\main.c function=main line= device= threadid= num_gangs= num_workers= vector_length= grid= block=
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ pgcc -acc -Minfo main.c -o main_acc_ubuntu.exe
main:
, Accelerator kernel generated
Generating Tesla code
, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
, Generating implicit copyout(histgram[data])
Generating implicit copyin(data[:])
cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ ./main_acc_ubuntu.exe
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =
histgram[] =

● 添加一个重甲你变量用来存储 data[i],在 Windos 下也正确了

 #include <stdio.h>
#include <stdlib.h>
#include <openacc.h> int main()
{
const int length = , basket = ;
int data[length], histgram[basket]; srand();
for (int i = ; i < basket; histgram[i++] = );
for (int i = ; i < length; data[i++] = rand() % ); int temp; // 新定义一个变量
#pragma acc parallel loop
for (int i = ; i < length; i++)
{
temp = data[i]; // 原子操作之前先单独计算下标
#pragma acc atomic update
histgram[temp] += ;// 使用 temp 作为下标
} for (int i = ; i < basket; i++)
printf("histgram[%d] = %d\n", i, histgram[i]); getchar();
return ;
}