GPU Considerations
Last updated
Last updated
int main()
{
//const unsigned int X=1; //1 Bytes (2us/1us)
//const unsigned int X=10; //10 Bytes (2us/1us)
//const unsigned int X=100; //100 Bytes (2us/1us)
//const unsigned int X=1000; //1k Bytes (2us/1us)
//const unsigned int X=10000; //10k Bytes (2.7us/2us)
//const unsigned int X=100000; //100k Bytes (10us/10us)
//const unsigned int X=1000000; //1 Megabyte (80us/79us)
//const unsigned int X=10000000; //10 Megabyte (1000us/900us)
//const unsigned int X=100000000; //100 Megabyte (10000us/10000us)
const unsigned int X=1000000000; //1000 Megabyte (106000us/103000us)
//const unsigned int X=256000000; //256 Megabyte (27000us/26000us)
//const unsigned int X=120*120*3; // 120x120 RGB image (43200 bytes) (7us/6us)
const unsigned int bytes = X*sizeof(char);
// Alocate memory on CPU
char *hostArray= (char*)malloc(bytes);
char *deviceArray;
// Allocate memory on GPU
cudaMalloc((char**)&deviceArray,bytes);
memset(hostArray,0,bytes);
// Transfer hostArray from CPU to GPU
cudaMemcpy(deviceArray,hostArray,bytes,cudaMemcpyHostToDevice);
// Get hostArray from GPU to CPU
cudaMemcpy(hostArray,deviceArray,bytes,cudaMemcpyDeviceToHost);
// Release memory from GPU
cudaFree(deviceArray);
}# Compiling
nvcc checkTransfer.cu -o checkTransfer
# Profiling
nvprof ./checkTransferlaraujo@lindev:~/work/learningOpenCl/cudaSamples$ nvprof ./checkTransfer
==8921== NVPROF is profiling process 8921, command: ./checkTransfer
==8921== Profiling application: ./checkTransfer
==8921== Profiling result:
Time(%) Time Calls Avg Min Max Name
51.36% 109.67ms 1 109.67ms 109.67ms 109.67ms [CUDA memcpy DtoH]
48.64% 103.87ms 1 103.87ms 103.87ms 103.87ms [CUDA memcpy HtoD]
==8921== API calls:
Time(%) Time Calls Avg Min Max Name
71.69% 213.87ms 2 106.93ms 103.95ms 109.92ms cudaMemcpy
28.01% 83.557ms 1 83.557ms 83.557ms 83.557ms cudaMalloc
0.19% 580.47us 83 6.9930us 84ns 323.09us cuDeviceGetAttribute
0.09% 270.79us 1 270.79us 270.79us 270.79us cudaFree
0.01% 31.607us 1 31.607us 31.607us 31.607us cuDeviceTotalMem
0.01% 22.521us 1 22.521us 22.521us 22.521us cuDeviceGetName
0.00% 919ns 2 459ns 122ns 797ns cuDeviceGetCount
0.00% 235ns 2 117ns 84ns 151ns cuDeviceGetnvvp ./checkTransfer