01-20 19:43
Notice
Recent Posts
Recent Comments
관리 메뉴

Scientific Computing & Data Science

[OpenCV] cuda:: DeviceInfo() 함수를 이용한 CUDA Device 정보 출력하기 본문

Programming/OpenCV

[OpenCV] cuda:: DeviceInfo() 함수를 이용한 CUDA Device 정보 출력하기

cinema4dr12 2015. 8. 30. 23:46

OpenCV의 cuda::DeviceInfo() 함수를 이용하여 각자의 CUDA Device의 정보를 출력하는 소스는 다음과 같습니다:

Example Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#include "opencv2/core/cuda.hpp"
#include <iostream>
 
using namespace std;
using namespace cv;
 
/* @ function main */
int main( int argc, char *argv[] )
{
    // CUDA device count
    int nDeviceCount = 0;
    nDeviceCount = cuda::getCudaEnabledDeviceCount();
 
    // CUDA device index
    int nCurrentDeviceIndex = 0;
    nCurrentDeviceIndex = cuda::getDevice();
 
    // CUDA device info
    cuda::DeviceInfo deviceInfo = cuda::DeviceInfo( nCurrentDeviceIndex );
 
    // CUDA device name
    const char* deviceName = deviceInfo.name();
 
    // global memory available on device in bytes
    const size_t deviceTotalGlobMem = deviceInfo.totalGlobalMem();
 
    // shared memory available per block in bytes
    const size_t deviceSharedMemPerBlock = deviceInfo.sharedMemPerBlock();
 
    // 32-bit registers available per block
        const int nDeviceegsPerBlock = deviceInfo.regsPerBlock();
 
    // warp size in threads
    const int nDeviceWarpSize = deviceInfo.warpSize();
 
    // maximum pitch in bytes allowed by memory copies
    const size_t deviceMemPitch = deviceInfo.memPitch();
 
    // maximum threads per block
    int nMaxThreadPerBlock = deviceInfo.maxThreadsPerBlock();
 
    // maximum size of each dimension of a block
    const Vec3i deviceMaxThreadsDim = deviceInfo.maxThreadsDim();
 
    // maximum size of each dimension of a grid
    const Vec3i deviceMaxGridSize = deviceInfo.maxGridSize();
 
    // clock frequency in kilohertz
    const int nDeviceClockRate = deviceInfo.clockRate();
 
    // constant memory available on device in bytes
        const size_t deviceTotalConstMem = deviceInfo.totalConstMem();
 
    // major compute capability
        const int nDeiveMajorVersion = deviceInfo.majorVersion();
 
    // minor compute capability
        const int nDeiveMinorVersion = deviceInfo.minorVersion();
 
    // pitch alignment requirement for texture references bound to pitched memory
    const size_t devicetexturePitchAlignment = deviceInfo.texturePitchAlignment();
 
        // number of multiprocessors on device
    const int nDeviceMultiProcessorCount = deviceInfo.multiProcessorCount();
 
        // specified whether there is a run time limit on kernels
        const bool bKernelExecTimeoutEnabled = deviceInfo.kernelExecTimeoutEnabled();
    char* tmp1 = bKernelExecTimeoutEnabled? "true" : "false";
 
        // device is integrated as opposed to discrete
        const bool bIDeviceIntegrated = deviceInfo.integrated();
    char* tmp2 = bIDeviceIntegrated? "true" : "false";
 
        // device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
        const bool bCanMapHostMemory = deviceInfo.canMapHostMemory();
    char* tmp3 = bCanMapHostMemory? "true" : "false";
 
    // maximum 1D texture size
        const int nMaxTex1D =  deviceInfo.maxTexture1D();
 
        // maximum 1D mipmapped texture size
    const int nMaxTexture1DMipmap = deviceInfo.maxTexture1DMipmap();
 
        // maximum size for 1D textures bound to linear memory
    const int nMaxTexture1DLinear = deviceInfo.maxTexture1DLinear();
 
        // maximum 2D texture dimensions
    const Vec2i deviceMaxTex2D = deviceInfo.maxTexture2D();
 
        // maximum 2D mipmapped texture dimensions
    const Vec2i deviceMaxTex2DMipmap = deviceInfo.maxTexture2DMipmap();
 
        // maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
    const Vec3i deviceMaxTex2DLinear = deviceInfo.maxTexture2DLinear();
 
        // maximum 2D texture dimensions if texture gather operations have to be performed
    const Vec2i deviceMaxTex2DGather = deviceInfo.maxTexture2DGather();
 
        // maximum 3D texture dimensions
    const Vec3i deviceMaxTex3D = deviceInfo.maxTexture3D();
 
        // maximum Cubemap texture dimensions
    const int deviceMaximumTexCubemap = deviceInfo.maxTextureCubemap();
 
        // maximum 1D layered texture dimensions
    const Vec2i deviceMaxTex1DLayered = deviceInfo.maxTexture1DLayered();
 
        // maximum 2D layered texture dimensions
    const Vec3i deviceMaxTex2DLayered = deviceInfo.maxTexture2DLayered();
 
        // maximum Cubemap layered texture dimensions
    const Vec2i deviceMaxTexCubemapLayered = deviceInfo.maxTextureCubemapLayered();
 
    // maximum 1D surface size
    const int deviceMaxSurface1D = deviceInfo.maxSurface1D();
 
        // maximum 2D surface dimensions
    const Vec2i deviceMaxSurface2D = deviceInfo.maxSurface2D();
 
    // maximum 3D surface dimensions
        const Vec3i deviceMaxSurface3D = deviceInfo.maxSurface3D();
 
        // maximum 1D layered surface dimensions
        const Vec2i deviceMaxSurf1DLayered = deviceInfo.maxSurface1DLayered();
 
        // maximum 2D layered surface dimensions
    const Vec3i deviceMaxSurf2DLayered = deviceInfo.maxSurface2DLayered();
 
        // maximum Cubemap surface dimensions
    const int deviceMaxSurfCubemap = deviceInfo.maxSurfaceCubemap();
 
        // maximum Cubemap layered surface dimensions
    const Vec2i deviceMaxSurfCubemapLayered = deviceInfo.maxSurfaceCubemapLayered();
 
        // alignment requirements for surfaces
    const size_t deviceSurfAlignment = deviceInfo.surfaceAlignment();
 
 
        // device can possibly execute multiple kernels concurrently
        const bool bConKernels = deviceInfo.concurrentKernels();
    char* tmp4 = bConKernels? "true" : "false";
 
        // device has ECC support enabled
        const bool bECCEnabled = deviceInfo.ECCEnabled();
    char* tmp5 = bECCEnabled? "true" : "false";
 
        // PCI bus ID of the device
        const int devicePciBusId = deviceInfo.pciBusID();
 
        // PCI device ID of the device
    const int devicePciDeviceId = deviceInfo.pciDeviceID();
 
        // PCI domain ID of the device
        const int devicePciDomainId = deviceInfo.pciDomainID();
 
        // true if device is a Tesla device using TCC driver, false otherwise
        const bool deviceTccDriver = deviceInfo.tccDriver();
 
        // number of asynchronous engines
    const int deviceAsyncEngineCount = deviceInfo.asyncEngineCount();
 
        // device shares a unified address space with the host
    const bool bUnifiedAddressing = deviceInfo.unifiedAddressing();
    char* tmp6 = bUnifiedAddressing? "true" : "false";
 
        // peak memory clock frequency in kilohertz
    const int deviceMemClockRate = deviceInfo.memoryClockRate();
 
        // global memory bus width in bits
        const int deviceMemBusWidth = deviceInfo.memoryBusWidth();
 
        // size of L2 cache in bytes
        const int deviceL2CacheSize = deviceInfo.l2CacheSize();
 
        // maximum resident threads per multiprocessor
        const int deviceMaxThreadsPerMultiProcessor = deviceInfo.maxThreadsPerMultiProcessor();
 
        // checks whether the CUDA module can be run on the given device
    const bool bIsCompatible = deviceInfo.isCompatible();
    char* tmp7 = bIsCompatible? "true" : "false";
 
    // print the results
    cout << "CUDA Enabled Device Count: " << nDeviceCount << endl;
    cout << "CUDA Deivce Index: " << nCurrentDeviceIndex << endl;
    cout << "Device Name: " << deviceName << endl;
    cout << "Device Toal Global Memory: " << deviceTotalGlobMem << endl;
    cout << "Device Shared Memory Per Block: " << deviceSharedMemPerBlock << endl;
    cout << "Device Registry Per BlocK: " << nDeviceegsPerBlock << endl;
    cout << "Device Warp Size: " << nDeviceWarpSize << endl;
    cout << "Device Memory Pitch: " << deviceMemPitch << endl;
    cout << "Maximum Threads Per Block: " << nMaxThreadPerBlock << endl;
    cout << "Maximum Threads Dimension: " << deviceMaxThreadsDim << endl;
    cout << "Maximum Grid Size: " << deviceMaxGridSize << endl;
    cout << "Device Clock Rate: " << nDeviceClockRate << endl;
    cout << "Device Total Constant Memory: " << deviceTotalConstMem << endl;
    cout << "Device Major Version: " << nDeiveMajorVersion << endl;
    cout << "Device Minor Version: " << nDeiveMinorVersion << endl;
    cout << "Device Texture Pitch Alignment: " << devicetexturePitchAlignment << endl;
    cout << "Device Multi Processor Count: " << nDeviceMultiProcessorCount << endl;
    cout << "Device Kernel Execution Timeout Enabled?: " << tmp1 << endl;
    cout << "Device Integrated?: " << tmp2 << endl;
    cout << "Device Can Map Host Memory?: " << tmp3 << endl;
    cout << "Device Maximum Texture 1D: " << nMaxTex1D << endl;
    cout << "Device maximum Texture 1D Mipmap: " << nMaxTexture1DMipmap << endl;
    cout << "Device Maximum Texture 1D Linear: " << nMaxTexture1DLinear << endl;
    cout << "Device Maximum Texture 2D: " << deviceMaxTex2D << endl;
    cout << "Device Maximum Texture 2D Mipmap: " << deviceMaxTex2DMipmap << endl;
    cout << "Device Maximum Texture 2D Linear: " << deviceMaxTex2DLinear << endl;
    cout << "Device Maximum Texutre 2D Gather: " << deviceMaxTex2DGather << endl;
    cout << "Device Maximum Texture 3D: " << deviceMaxTex3D << endl;
    cout << "Device Maximum Texture Cubemap: " << deviceMaximumTexCubemap << endl;
    cout << "Device Maximum Texture 1D Layered: " << deviceMaxTex1DLayered << endl;
    cout << "Device Maximum Texture 2D Layered: " << deviceMaxTex2DLayered << endl;
    cout << "Device Maximum Texture Cubemap Layered: " << deviceMaxTexCubemapLayered << endl;
    cout << "Device Maximum Surface 1D: " << deviceMaxSurface1D << endl;
    cout << "Device Maximum Surface 2D: " << deviceMaxSurface2D << endl;
    cout << "Device Maximum Surface 3D: " << deviceMaxSurface3D << endl;
    cout << "Device Maximum Surface 1D Layered: " << deviceMaxSurf1DLayered << endl;
    cout << "Device Maximum Surface 2D Layered: " << deviceMaxSurf2DLayered << endl;
    cout << "Device Maximum Surface Cubemap: " << deviceMaxSurfCubemap << endl;
    cout << "Device Maximum Surface Cubemap Layered: " << deviceMaxSurfCubemapLayered << endl;
    cout << "Device Surfce Alignment: " << deviceSurfAlignment << endl;
    cout << "Concurrent Kernels?: "  << tmp4 << endl;
    cout << "ECC Enabled?: " << tmp5 << endl;
    cout << "Device PCI Bus ID: " << devicePciBusId << endl;
    cout << "Device PCI Device ID: " << devicePciDeviceId << endl;
    cout << "Device PCI Domain ID: " << devicePciDomainId << endl;
    cout << "Device TCC Driver: " << deviceTccDriver << endl;
    cout << "Device Async Engine Count: " << deviceAsyncEngineCount << endl;
    cout << "Device Unified Addressing?: " << tmp6 << endl;
    cout << "Device Memory Clock Rate: " << deviceMemClockRate << endl;
    cout << "Device Memory Bus Width: " << deviceMemBusWidth << endl;
    cout << "Device L2 Cache Size: " << deviceL2CacheSize << endl;
    cout << "Device Maximum Threads Per Multi Processor: " << deviceMaxThreadsPerMultiProcessor << endl;
    cout << "Is Compatible?: " << tmp7 << endl;
     
    return 0;
}
cs

Results

저의 PC에서 실행한 결과는 다음과 같습니다. 각자의 CUDA를 지원하는 그래픽스 카드 및 실행환경에 따라 다르게 나올 것입니다.


CUDA Enabled Device Count: 1
CUDA Deivce Index: 0
Device Name: GeForce GTX 750 Ti
Device Toal Global Memory: 2147483648
Device Shared Memory Per Block: 49152
Device Registry Per BlocK: 65536
Device Warp Size: 32
Device Memory Pitch: 2147483647
Maximum Threads Per Block: 1024
Maximum Threads Dimension: [1024, 1024, 64]
Maximum Grid Size: [2147483647, 65535, 65535]
Device Clock Rate: 1110500
Device Total Constant Memory: 65536
Device Major Version: 5
Device Minor Version: 0
Device Texture Pitch Alignment: 32
Device Multi Processor Count: 5
Device Kernel Execution Timeout Enabled?: true
Device Integrated?: false
Device Can Map Host Memory?: true
Device Maximum Texture 1D: 65536
Device maximum Texture 1D Mipmap: 16384
Device Maximum Texture 1D Linear: 134217728
Device Maximum Texture 2D: [65536, 65536]
Device Maximum Texture 2D Mipmap: [16384, 16384]
Device Maximum Texture 2D Linear: [65000, 65000, 1048544]
Device Maximum Texutre 2D Gather: [16384, 16384]
Device Maximum Texture 3D: [4096, 4096, 4096]
Device Maximum Texture Cubemap: 16384
Device Maximum Texture 1D Layered: [16384, 2048]
Device Maximum Texture 2D Layered: [16384, 16384, 2048]
Device Maximum Texture Cubemap Layered: [16384, 2046]
Device Maximum Surface 1D: 65536
Device Maximum Surface 2D: [65536, 32768]
Device Maximum Surface 3D: [65536, 32768, 2048]
Device Maximum Surface 1D Layered: [65536, 2048]
Device Maximum Surface 2D Layered: [65536, 32768, 2048]
Device Maximum Surface Cubemap: 32768
Device Maximum Surface Cubemap Layered: [32768, 2046]
Device Surfce Alignment: 512
Concurrent Kernels?: true
ECC Enabled?: false
Device PCI Bus ID: 1
Device PCI Device ID: 0
Device PCI Domain ID: 0
Device TCC Driver: 0
Device Async Engine Count: 1
Device Unified Addressing?: true
Device Memory Clock Rate: 2700000
Device Memory Bus Width: 128
Device L2 Cache Size: 2097152
Device Maximum Threads Per Multi Processor: 2048
Is Compatible?: true


Comments