We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4366ff9 commit 1a03b70Copy full SHA for 1a03b70
ggml-cuda.cu
@@ -1661,8 +1661,8 @@ template <int vdr> static __device__ __forceinline__ float vec_dot_q8_1_q8_1_imp
1661
#else
1662
const float2 dm8f = __half22float2(dm8);
1663
const float2 ds8f = __half22float2(ds8);
1664
- const float d8d8 = __low2float(dm8) * __low2float(ds8);
1665
- const float m8s8 = __high2float(dm8) * __high2float(ds8);
+ const float d8d8 = dm8f.x * ds8f.x;
+ const float m8s8 = dm8f.y * ds8f.y;
1666
#endif // GGML_CUDA_F16
1667
1668
// scale second part of sum by QI8_1/ vdr to compensate for multiple threads adding it
0 commit comments