cc_staff
1,486
edits
No edit summary |
|||
Line 3: | Line 3: | ||
[[Category:Software]] | [[Category:Software]] | ||
=Quick start guide= | =Quick start guide= | ||
{{File | |||
|name=add.cu | |||
|lang="c++" | |||
|contents= | |||
#include <iostream> | |||
__global__ void add (int *a, int *b, int *c){ | |||
*c = *a + *b; | |||
} | |||
int main(void){ | |||
int a, b, c; | |||
int *dev_a, *dev_b, *dev_c; | |||
int size = sizeof(int); | |||
// allocate device copies of a,b, c | |||
cudaMalloc ( (void**) &dev_a, size); | |||
cudaMalloc ( (void**) &dev_b, size); | |||
cudaMalloc ( (void**) &dev_c, size); | |||
a=2; b=7; | |||
// copy inputs to device | |||
cudaMemcpy (dev_a, &a, size, cudaMemcpyHostToDevice); | |||
cudaMemcpy (dev_b, &b, size, cudaMemcpyHostToDevice); | |||
// launch add() kernel on GPU, passing parameters | |||
add <<< 1, 1 >>> (dev_a, dev_b, dev_c); | |||
// copy device result back to host | |||
cudaMemcpy (&c, dev_c, size, cudaMemcpyDeviceToHost); | |||
std::cout<<a<<"+"<<b<<"="<<c<<std::endl; | |||
cudaFree ( dev_a ); cudaFree ( dev_b ); cudaFree ( dev_c ); | |||
} | |||
}} | |||
Save the above file as <code>add.cu</code>. '''The <code>cu</code> file extension is important!'''. To build it issue the following commands: | |||
<source lang="console"> | <source lang="console"> | ||
$ module purge | $ module purge | ||
$ module load cuda | $ module load cuda | ||
$ nvcc add.cu -o add | |||
</source> | </source> | ||
Create a GPU job script called gpu_job.sh | |||
{{File | |||
|name=gpu_job.sh | |||
|lang="sh" | |||
|contents= | |||
#!/bin/bash | |||
#SBATCH --account=def-someuser | |||
#SBATCH --gres=gpu:1 # Number of GPUs (per node) | |||
#SBATCH --mem=400M # memory (per node) | |||
#SBATCH --time=0-00:10 # time (DD-HH:MM) | |||
./add #name of your program | |||
}} | |||
Then submit your GPU job to the scheduler with following command | |||
<source lang="console"> | |||
$ sbatch gpu_job.sh | |||
</source> | |||
Once your job has finished you should see an output file like: | |||
<source lang="console"> | |||
$ cat slurm-3127733.out | |||
2+7=9 | |||
</source> | |||
If you run this where a GPU isn't present you might see output something like <code>2+7=0</code>. | |||
=Introduction= <!--T:1--> | =Introduction= <!--T:1--> | ||
This tutorial introduces the graphics processing unit (GPU) as a massively parallel computing device; the CUDA parallel programming language; and some of the CUDA numerical libraries for high performance computing. | This tutorial introduces the graphics processing unit (GPU) as a massively parallel computing device; the CUDA parallel programming language; and some of the CUDA numerical libraries for high performance computing. |