CUDA tutorial: Difference between revisions

Jump to navigation Jump to search
consistent indentation
No edit summary
(consistent indentation)
Line 105: Line 105:
The following example shows how to add two numbers on the GPU using CUDA. Note that this is just an exercise, it's very simple, so don't expect to see any actual acceleration.
The following example shows how to add two numbers on the GPU using CUDA. Note that this is just an exercise, it's very simple, so don't expect to see any actual acceleration.
<syntaxhighlight lang="cpp" line highlight="1,5">
<syntaxhighlight lang="cpp" line highlight="1,5">
__global__  void add (int *a, int *b, int *c){
___global__ void add (int *a, int *b, int *c){
  *c = *a + *b;
}


<!--T:11-->
*c = *a + *b;
}
int main(void){
int main(void){
int a, b, c;
  int a, b, c;
int *dev_a, *dev_b, *dev_c;
  int *dev_a, *dev_b, *dev_c;
int size = sizeof(int);
  int size = sizeof(int);


<!--T:12-->
  //  allocate device copies of a,b, c
//  allocate device copies of a,b, c
  cudaMalloc ( (void**) &dev_a, size);
cudaMalloc ( (void**) &dev_a, size);
  cudaMalloc ( (void**) &dev_b, size);
cudaMalloc ( (void**) &dev_b, size);
  cudaMalloc ( (void**) &dev_c, size);
cudaMalloc ( (void**) &dev_c, size);


<!--T:13-->
  a=2; b=7;
a=2; b=7;
  //  copy inputs to device
//  copy inputs to device
  cudaMemcpy (dev_a, &a, size, cudaMemcpyHostToDevice);
cudaMemcpy (dev_a, &a, size, cudaMemcpyHostToDevice);
  cudaMemcpy (dev_b, &b, size, cudaMemcpyHostToDevice);
cudaMemcpy (dev_b, &b, size, cudaMemcpyHostToDevice);


<!--T:14-->
  // launch add() kernel on GPU, passing parameters
// launch add() kernel on GPU, passing parameters
  add <<< 1, 1 >>> (dev_a, dev_b, dev_c);
add <<< 1, 1 >>> (dev_a, dev_b, dev_c);


<!--T:15-->
  // copy device result back to host
// copy device result back to host
  cudaMemcpy (&c, dev_c, size, cudaMemcpyDeviceToHost);
cudaMemcpy (&c, dev_c, size, cudaMemcpyDeviceToHost);


<!--T:16-->
  cudaFree ( dev_a ); cudaFree ( dev_b ); cudaFree ( dev_c );
cudaFree ( dev_a ); cudaFree ( dev_b ); cudaFree ( dev_c );  
}
}
</syntaxhighlight>
</syntaxhighlight>
cc_staff
1,486

edits

Navigation menu