-
-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathcompute_noop.py
138 lines (112 loc) · 3.55 KB
/
compute_noop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Example compute shader that does ... nothing but copy a value from one
buffer into another.
"""
import wgpu
from wgpu.utils.compute import compute_with_buffers # Convenience function
# %% Shader and data
shader_source = """
@group(0) @binding(0)
var<storage,read> data1: array<i32>;
@group(0) @binding(1)
var<storage,read_write> data2: array<i32>;
@compute
@workgroup_size(1)
fn main(@builtin(global_invocation_id) index: vec3<u32>) {
let i: u32 = index.x;
data2[i] = data1[i];
}
"""
# Create input data as a memoryview
n = 20
data = memoryview(bytearray(n * 4)).cast("i")
for i in range(n):
data[i] = i
# %% The short version, using memoryview
# The first arg is the input data, per binding
# The second arg is the output types, per binding
out = compute_with_buffers({0: data}, {1: (n, "i")}, shader_source, n=n)
# The result is a dict matching the output types
# Select data from buffer at binding 1
result = out[1].tolist()
print(result)
assert result == list(range(20))
# %% The short version, using numpy
# import numpy as np
#
# numpy_data = np.frombuffer(data, np.int32)
# out = compute_with_buffers({0: numpy_data}, {1: numpy_data.nbytes}, shader_source, n=n)
# result = np.frombuffer(out[1], dtype=np.int32)
# print(result.tolist())
# %% The long version using the wgpu API
# %% Create device
# Create device and shader object
device = wgpu.utils.get_default_device()
# Show all available adapters
adapters = wgpu.gpu.enumerate_adapters_sync()
for a in adapters:
print(a.summary)
# You can select specific GPU from the available adapters
# adapter = None
# for a in adapters:
# if "NVIDIA" in a.summary:
# adapter = a
# break
# assert adapter is not None
# device = adapter.request_device_sync()
# %%
cshader = device.create_shader_module(code=shader_source)
# Create buffer objects, input buffer is mapped.
buffer1 = device.create_buffer_with_data(data=data, usage=wgpu.BufferUsage.STORAGE)
buffer2 = device.create_buffer(
size=data.nbytes, usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_SRC
)
# Setup layout and bindings
binding_layouts = [
{
"binding": 0,
"visibility": wgpu.ShaderStage.COMPUTE,
"buffer": {
"type": wgpu.BufferBindingType.read_only_storage,
},
},
{
"binding": 1,
"visibility": wgpu.ShaderStage.COMPUTE,
"buffer": {
"type": wgpu.BufferBindingType.storage,
},
},
]
bindings = [
{
"binding": 0,
"resource": {"buffer": buffer1, "offset": 0, "size": buffer1.size},
},
{
"binding": 1,
"resource": {"buffer": buffer2, "offset": 0, "size": buffer2.size},
},
]
# Put everything together
bind_group_layout = device.create_bind_group_layout(entries=binding_layouts)
pipeline_layout = device.create_pipeline_layout(bind_group_layouts=[bind_group_layout])
bind_group = device.create_bind_group(layout=bind_group_layout, entries=bindings)
# Create and run the pipeline
compute_pipeline = device.create_compute_pipeline(
layout=pipeline_layout,
compute={"module": cshader, "entry_point": "main"},
)
command_encoder = device.create_command_encoder()
compute_pass = command_encoder.begin_compute_pass()
compute_pass.set_pipeline(compute_pipeline)
compute_pass.set_bind_group(0, bind_group)
compute_pass.dispatch_workgroups(n, 1, 1) # x y z
compute_pass.end()
device.queue.submit([command_encoder.finish()])
# Read result
# result = buffer2.read_data().cast("i")
out = device.queue.read_buffer(buffer2).cast("i")
result = out.tolist()
print(result)
assert result == list(range(20))