Bureaucrats, cc_docs_admin, cc_staff
337
edits
No edit summary |
No edit summary |
||
Line 117: | Line 117: | ||
__global__ void dot(int *a, int *b, int *c){ | __global__ void dot(int *a, int *b, int *c){ | ||
int temp = a[threadIdx.x]*b[threadIdx.x]; | int temp = a[threadIdx.x]*b[threadIdx.x]; | ||
} | |||
</syntaxhighlight> | |||
After each thread computed its portion, we need to add everything together. Each threads has to share its data. However, the problem is that each copy of thread's temp is private.This can resolved with the use of shared memory. Below is the kernel with the modifications to account the shared memory usage: | |||
<syntaxhighlight lang="cpp" line highlight="1,4"> | |||
#define N 512 | |||
__global__ void dot(int *a, int *b, int *c){ | |||
__shared__ int temp[N]; | |||
temp[threadIdx.x] = a[threadIdx.x]*b[threadIdx.x]; | |||
__syncthreads(); | |||
if(threadIdx.x==0){ | |||
int sum; for(int i=0;i<N;i++) sum+= temp[i]; | |||
*c=sum; } | |||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> |