Skip to content

Commit 05697f6

Browse files
committed
metal : simplify f16 and f32 dequant kernels (#0)
1 parent f8e5813 commit 05697f6

File tree

1 file changed

+2
-8
lines changed

1 file changed

+2
-8
lines changed

ggml/src/ggml-metal.metal

+2-8
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,12 @@ constexpr constant static float kvalues_iq4nl_f[16] = {
1919
// NOTE: this is not dequantizing - we are simply fitting the template
2020
template <typename type4x4>
2121
void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) {
22-
float4x4 temp = *(((device float4x4 *)src));
23-
for (int i = 0; i < 16; i++){
24-
reg[i/4][i%4] = temp[i/4][i%4];
25-
}
22+
reg = (type4x4)(*src);
2623
}
2724

2825
template <typename type4x4>
2926
void dequantize_f16(device const half4x4 * src, short il, thread type4x4 & reg) {
30-
half4x4 temp = *(((device half4x4 *)src));
31-
for (int i = 0; i < 16; i++){
32-
reg[i/4][i%4] = temp[i/4][i%4];
33-
}
27+
reg = (type4x4)(*src);
3428
}
3529

3630
template <typename type4x4>

0 commit comments

Comments
 (0)