01-18 12:00
Notice
Recent Posts
Recent Comments
관리 메뉴

Scientific Computing & Data Science

[CUDA] Vector Add 예제 2. 본문

Scientific Computing/NVIDIA CUDA

[CUDA] Vector Add 예제 2.

cinema4dr12 2015. 1. 2. 20:28

[VectorAdd.cu]

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>

#define arraySize 1000

__global__ void addKernel( int *c, const int *a, const int *b )
{
    int i = threadIdx.x;

	if( i < arraySize )
		c[i] = a[i] + b[i];
}

int main()
{
    int a[arraySize];
    int b[arraySize];
    int c[arraySize];

	int *dev_a = 0;
    int *dev_b = 0;
    int *dev_c = 0;

	// fill the arrays 'a' and 'b' on the CPU
	for( int i = 0 ; i < arraySize ; i++ ) {
		a[i] = i;
		b[i] = i;
	}

	// Add vectors in parallel.
	// Allocate GPU buffers for three vectors (two input, one output)
	cudaMalloc((void**)&dev_c, arraySize * sizeof(int));
	cudaMalloc((void**)&dev_a, arraySize * sizeof(int));
	cudaMalloc((void**)&dev_b, arraySize * sizeof(int));

	// copy the arrays 'a' and 'b' to the GPU
	cudaMemcpy(dev_a, a, arraySize * sizeof(int), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_b, b, arraySize * sizeof(int), cudaMemcpyHostToDevice);

	addKernel<<<1, arraySize>>>(dev_c, dev_a, dev_b);
	cudaDeviceSynchronize();

	// copy the array 'c' back from the GPU to the CPU
	cudaMemcpy(c, dev_c, arraySize * sizeof(int), cudaMemcpyDeviceToHost);

	// display the results
	for( int i = 0 ; i < arraySize ; i++ ) {
		printf( "%d + %d = %d\n", a[i], b[i], c[i] );
	}

	// free the memory allocated on the GPU
	cudaFree(dev_c);
    cudaFree(dev_a);
    cudaFree(dev_b);
    
    return 0;
}


Comments