-
Notifications
You must be signed in to change notification settings - Fork 0
/
improved-cuda-test-script.py
124 lines (99 loc) · 4.21 KB
/
improved-cuda-test-script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import torch
import time
import torch.nn as nn
def test_cuda_availability():
"""
Test CUDA availability and print device information.
"""
if torch.cuda.is_available():
print("CUDA is available!")
print(f"Number of available GPUs: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
print(f"Device {i}: {torch.cuda.get_device_name(i)}")
print(f"Current device: {torch.cuda.current_device()}")
print(f"CUDA capability: {torch.cuda.get_device_capability(torch.cuda.current_device())}")
else:
print("CUDA is not available. Please check your CUDA installation.")
def test_matmul(size=10000, iterations=10):
"""
Test matrix multiplication performance.
Args:
size (int): Size of the square matrices
iterations (int): Number of times to repeat the multiplication
"""
print(f"\nTesting CUDA matrix multiplication (size: {size}x{size}, iterations: {iterations})...")
A = torch.randn(size, size, device='cuda')
B = torch.randn(size, size, device='cuda')
start_time = time.time()
for _ in range(iterations):
C = torch.matmul(A, B)
torch.cuda.synchronize() # Ensure all CUDA operations are completed
end_time = time.time()
print(f"Matrix multiplication on GPU took {end_time - start_time:.6f} seconds")
def test_basic_operations(size=10000000, iterations=100):
"""
Test basic CUDA operations.
Args:
size (int): Size of the vectors
iterations (int): Number of times to repeat the operations
"""
print(f"\nTesting basic CUDA operations (vector size: {size}, iterations: {iterations})...")
x = torch.randn(size, device='cuda')
y = torch.randn(size, device='cuda')
start_time = time.time()
for _ in range(iterations):
z = x + y
z = x * y
z = torch.sin(x)
z = torch.exp(y)
torch.cuda.synchronize()
end_time = time.time()
print(f"Basic operations on GPU took {end_time - start_time:.6f} seconds")
def test_convolution(size=1024, channels=64, kernel_size=3, iterations=1000):
"""
Test 2D convolution performance.
Args:
size (int): Size of the square input
channels (int): Number of input and output channels
kernel_size (int): Size of the convolution kernel
iterations (int): Number of times to repeat the convolution
"""
print(f"\nTesting CUDA 2D convolution (size: {size}x{size}, channels: {channels}, kernel: {kernel_size}x{kernel_size}, iterations: {iterations})...")
input_tensor = torch.randn(1, channels, size, size, device='cuda')
conv_layer = nn.Conv2d(channels, channels, kernel_size, padding=kernel_size//2).cuda()
start_time = time.time()
for _ in range(iterations):
output = conv_layer(input_tensor)
torch.cuda.synchronize()
end_time = time.time()
print(f"2D convolution on GPU took {end_time - start_time:.6f} seconds")
def test_lstm(seq_length=1000, input_size=512, hidden_size=512, num_layers=2, batch_size=64, iterations=100):
"""
Test LSTM performance.
Args:
seq_length (int): Length of the input sequence
input_size (int): Size of input features
hidden_size (int): Size of hidden state
num_layers (int): Number of LSTM layers
batch_size (int): Batch size
iterations (int): Number of times to repeat the forward pass
"""
print(f"\nTesting CUDA LSTM (seq_length: {seq_length}, input_size: {input_size}, hidden_size: {hidden_size}, layers: {num_layers}, batch_size: {batch_size}, iterations: {iterations})...")
input_tensor = torch.randn(seq_length, batch_size, input_size, device='cuda')
lstm = nn.LSTM(input_size, hidden_size, num_layers).cuda()
start_time = time.time()
for _ in range(iterations):
output, _ = lstm(input_tensor)
torch.cuda.synchronize()
end_time = time.time()
print(f"LSTM forward pass on GPU took {end_time - start_time:.6f} seconds")
def main():
torch.cuda.empty_cache() # Clear GPU cache before starting tests
test_cuda_availability()
test_matmul()
test_basic_operations()
test_convolution()
test_lstm()
print("\nAll tests completed.")
if __name__ == "__main__":
main()