File tree 1 file changed +10
-4
lines changed
1 file changed +10
-4
lines changed Original file line number Diff line number Diff line change @@ -99,11 +99,17 @@ function launch_configuration(backend::AbstractGPUBackend, heuristic;
99
99
100
100
if elements_per_thread > 1 && blocks > heuristic. blocks
101
101
# we want to launch more blocks than required, so prefer a grid-stride loop instead
102
- nelem = clamp (fld (blocks, heuristic. blocks), 1 , elements_per_thread)
103
- blocks = cld (blocks, nelem)
104
- (threads= threads, blocks= blocks, elements_per_thread= nelem)
102
+ # # try to stick to the number of blocks that the heuristic suggested
103
+ blocks = heuristic. blocks
104
+ nelem = cld (elements, blocks* threads)
105
+ # # only bump the number of blocks if we really need to
106
+ if nelem > elements_per_thread
107
+ nelem = elements_per_thread
108
+ blocks = cld (elements, nelem* threads)
109
+ end
110
+ (; threads, blocks, elements_per_thread= nelem)
105
111
else
106
- (threads= threads, blocks = blocks, elements_per_thread= 1 )
112
+ (; threads, blocks, elements_per_thread= 1 )
107
113
end
108
114
end
109
115
You can’t perform that action at this time.
0 commit comments