写在前面
查看硬件属性
Tesla T4
#include <stdio.h>
#include <cuda_runtime.h>
/*
* Fetches basic information on the first device in the current CUDA platform,
* including number of SMs, bytes of constant memory, bytes of shared memory per
* block, etc.
*/
#define CUDA_CHECK(call) \
do { \
cudaError_t err = call; \
if (err != cudaSuccess) { \
fprintf(stderr, "CUDA error %s:%d: %s\n", __FILE__, __LINE__, \
cudaGetErrorString(err)); \
exit(1); \
} \
} while (0)
int main(int argc, char *argv[])
{
int iDev = 0;
cudaDeviceProp iProp;
CUDA_CHECK(cudaGetDeviceProperties(&iProp, iDev));
printf("Device %d: %s\n", iDev, iProp.name);
printf(" Number of multiprocessors: %d\n",
iProp.multiProcessorCount);
printf(" Total amount of constant memory: %4.2f KB\n",
iProp.totalConstMem / 1024.0);
printf(" Total amount of shared memory per block: %4.2f KB\n",
iProp.sharedMemPerBlock / 1024.0);
printf(" Total number of registers available per block: %d\n",
iProp.regsPerBlock);
printf(" Warp size: %d\n",
iProp.warpSize);
printf(" Maximum number of threads per block: %d\n",
iProp.maxThreadsPerBlock);
printf(" Maximum number of threads per multiprocessor: %d\n",
iProp.maxThreadsPerMultiProcessor);
printf(" Maximum number of warps per multiprocessor: %d\n",
iProp.maxThreadsPerMultiProcessor / 32);
return EXIT_SUCCESS;
}
基本信息
Device 0: Tesla T4
Number of multiprocessors: 40
Total amount of constant memory: 64.00 KB
Total amount of shared memory per block: 48.00 KB
Total number of registers available per block: 65536
Warp size: 32
Maximum number of threads per block: 1024
Maximum number of threads per multiprocessor: 1024
Maximum number of warps per multiprocessor: 32
Device 0: NVIDIA A10
Number of multiprocessors: 72
Total amount of constant memory: 64.00 KB
Total amount of shared memory per block: 48.00 KB
Total number of registers available per block: 65536
Warp size: 32
Maximum number of threads per block: 1024
Maximum number of threads per multiprocessor: 1536
Maximum number of warps per multiprocessor: 48
Device 0: NVIDIA L20
Number of multiprocessors: 92
Total amount of constant memory: 64.00 KB
Total amount of shared memory per block: 48.00 KB
Total number of registers available per block: 65536
Warp size: 32
Maximum number of threads per block: 1024
Maximum number of threads per multiprocessor: 1536
Maximum number of warps per multiprocessor: 48