Skip to content

Commit

Permalink
perf: accelerate alibi (#365)
Browse files Browse the repository at this point in the history
Alibi experienced a performance degradation after #262 because of
increased number of integer division.
This PR fixes the issue.
  • Loading branch information
yzh119 authored Jul 10, 2024
1 parent 1116237 commit 4f0a9f9
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions include/flashinfer/fastdiv.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -65,27 +65,35 @@ struct uint_fastdiv {
__host__ __device__ __forceinline__ operator unsigned int() const { return d; }

__host__ __device__ __forceinline__ void divmod(uint32_t n, uint32_t& q, uint32_t& r) const {
if (d == 1) {
q = n;
} else {
#ifdef __CUDA_ARCH__
asm("mul.hi.u32 %0, %1, %2;" : "=r"(q) : "r"(m), "r"(n));
q = __umulhi(m, n);
#else
q = (((unsigned long long)((long long)m * (long long)n)) >> 32);
q = (((unsigned long long)((long long)m * (long long)n)) >> 32);
#endif
q += a * n;
q >>= s;
q += a * n;
q >>= s;
}
r = n - q * d;
}
};

__host__ __device__ __forceinline__ uint32_t operator/(const uint32_t n,
const uint_fastdiv& divisor) {
uint32_t q;
if (divisor.d == 1) {
q = n;
} else {
#ifdef __CUDA_ARCH__
asm("mul.hi.u32 %0, %1, %2;" : "=r"(q) : "r"(divisor.m), "r"(n));
q = __umulhi(divisor.m, n);
#else
q = (((unsigned long long)((long long)divisor.m * (long long)n)) >> 32);
q = (((unsigned long long)((long long)divisor.m * (long long)n)) >> 32);
#endif
q += divisor.a * n;
q >>= divisor.s;
q += divisor.a * n;
q >>= divisor.s;
}
return q;
}

Expand Down

0 comments on commit 4f0a9f9

Please # to comment.