cc_staff
1,486
edits
No edit summary |
(consistent indentation) |
||
Line 105: | Line 105: | ||
The following example shows how to add two numbers on the GPU using CUDA. Note that this is just an exercise, it's very simple, so don't expect to see any actual acceleration. | The following example shows how to add two numbers on the GPU using CUDA. Note that this is just an exercise, it's very simple, so don't expect to see any actual acceleration. | ||
<syntaxhighlight lang="cpp" line highlight="1,5"> | <syntaxhighlight lang="cpp" line highlight="1,5"> | ||
___global__ void add (int *a, int *b, int *c){ | |||
*c = *a + *b; | |||
} | |||
int main(void){ | int main(void){ | ||
int a, b, c; | |||
int *dev_a, *dev_b, *dev_c; | |||
int size = sizeof(int); | |||
// allocate device copies of a,b, c | |||
// allocate device copies of a,b, c | cudaMalloc ( (void**) &dev_a, size); | ||
cudaMalloc ( (void**) &dev_a, size); | cudaMalloc ( (void**) &dev_b, size); | ||
cudaMalloc ( (void**) &dev_b, size); | cudaMalloc ( (void**) &dev_c, size); | ||
cudaMalloc ( (void**) &dev_c, size); | |||
a=2; b=7; | |||
a=2; b=7; | // copy inputs to device | ||
// copy inputs to device | cudaMemcpy (dev_a, &a, size, cudaMemcpyHostToDevice); | ||
cudaMemcpy (dev_a, &a, size, cudaMemcpyHostToDevice); | cudaMemcpy (dev_b, &b, size, cudaMemcpyHostToDevice); | ||
cudaMemcpy (dev_b, &b, size, cudaMemcpyHostToDevice); | |||
// launch add() kernel on GPU, passing parameters | |||
// launch add() kernel on GPU, passing parameters | add <<< 1, 1 >>> (dev_a, dev_b, dev_c); | ||
add <<< 1, 1 >>> (dev_a, dev_b, dev_c); | |||
// copy device result back to host | |||
// copy device result back to host | cudaMemcpy (&c, dev_c, size, cudaMemcpyDeviceToHost); | ||
cudaMemcpy (&c, dev_c, size, cudaMemcpyDeviceToHost); | |||
cudaFree ( dev_a ); cudaFree ( dev_b ); cudaFree ( dev_c ); | |||
cudaFree ( dev_a ); cudaFree ( dev_b ); cudaFree ( dev_c ); | |||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> |