Bureaucrats, cc_docs_admin, cc_staff
337
edits
No edit summary |
No edit summary |
||
Line 53: | Line 53: | ||
* each GPU core (streaming processor) execute a sequential '''Thread''', where '''Thread''' is a smallest set of instructions handled by the operating system's schedule. | * each GPU core (streaming processor) execute a sequential '''Thread''', where '''Thread''' is a smallest set of instructions handled by the operating system's schedule. | ||
* all GPU cores execute the kernel in a SIMT fashion (Single Instruction Multiple Threads) | * all GPU cores execute the kernel in a SIMT fashion (Single Instruction Multiple Threads) | ||
= First CUDA C Program= | |||
<syntaxhighlight lang="cpp" line highlight="1,5,10,12"> | |||
__global__ void add (int *a, int *b, int *c){ | |||
*c = *a + *b; | |||
} | |||
int main(void){ | |||
int a, b, c; | |||
int *dev_a, *dev_b, *dev_c; | |||
int size = sizeof(int); | |||
// allocate device copies of a,b, c | |||
cudaMalloc ( (void**) &dev_a, size); | |||
cudaMalloc ( (void**) &dev_b, size); | |||
cudaMalloc ( (void**) &dev_c, size); | |||
a=2; b=7; | |||
// copy inputs to device | |||
cudaMemcpy (dev_a, &a, size, cudaMemcpyHostToDevice); | |||
cudaMemcpy (dev_b, &b, size, cudaMemcpyHostToDevice); | |||
// launch add() kernel on GPU, passing parameters | |||
add <<< 1, 1 >>> (dev_a, dev_b, dev_c); | |||
// copy device result back to host | |||
cudaMemcpy (&c, dev_c, size, cudaMemcpyDeviceToHost); | |||
cudaFree ( dev_a ); cudaFree ( dev_b ); cudaFree ( dev_c ); | |||
} | |||
</syntaxhighlight> |