Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Support SSE3, SSSE3, SSE4.1 and SSE4.2. #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions sbcl-core.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,57 @@ May emit additional instructions using the temporary register."
(inst ,insn ,@tags tmp (ensure-reg-or-mem y) ,@imm)
(ensure-move ,rtype r tmp))))))))

#|---------------------------------|
| TERNARY REG INTRINSICS |
|---------------------------------|#

(define-vop (sse-ternary-base-op)
(:args (x :scs #.+any-sse-reg/immediate+ :target r)
(y :scs #.+any-sse-reg/immediate+)
(z :scs #.+any-sse-reg/immediate+))
(:results (r :scs (int-sse-reg)))
(:arg-types simd-pack simd-pack simd-pack)
(:policy :fast-safe)
(:note "inline SSE ternary operation")
(:vop-var vop)
(:save-p :compute-only))

(define-vop (sse-ternary-op sse-ternary-base-op)
(:temporary (:sc sse-reg) tmp))

(defmacro def-ternary-intrinsic (&whole whole
name rtype insn cost c-name
&key implicit-reg tags x-type y-type
z-type)
(declare (ignore c-name x-type y-type z-type))
`(progn
(export ',name)
(save-intrinsic-spec ,name ,whole)
(defknown ,name (sse-pack sse-pack sse-pack) ,rtype
(foldable flushable dx-safe))
;;
(define-vop (,name sse-ternary-op)
(:translate ,name)
(:results (r :scs (,(type-name-to-reg rtype))))
(:result-types ,(type-name-to-primitive rtype))
,@(when implicit-reg
`((:temporary (:sc sse-reg :offset 0) xmm0)))
(:generator ,cost
,@(if implicit-reg
;; instruction requires z in xmm0.
`((unless (or (location= y r) (location= z r))
(setf tmp r))
(ensure-load ,rtype xmm0 z)
(ensure-load ,rtype tmp x)
(inst ,insn ,@tags tmp (ensure-reg-or-mem y) xmm0)
(ensure-move ,rtype r tmp))
`((unless (or (location= y r) (location= z r))
(setf tmp r))
(ensure-load ,rtype tmp x)
(inst ,insn ,@tags tmp (ensure-reg-or-mem y)
(ensure-reg-or-mem z))
(ensure-move ,rtype r tmp)))))))

#|---------------------------------|
| XMM/INTEGER BINARY INTRINSICS |
|---------------------------------|#
Expand Down
32 changes: 32 additions & 0 deletions sbcl-functions.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@
(declare (type sse-pack x y))
(truly-the ,rtype (%primitive ,name x y)))))
;;
(def-ternary-intrinsic (name rtype insn cost c-name &key &allow-other-keys)
(declare (ignore insn cost c-name))
`(defun ,name (x y z)
(declare (type sse-pack x y z))
(truly-the ,rtype (%primitive ,name x y z))))
;;
(def-sse-int-intrinsic (name itype rtype insn cost c-name &key immediate-arg &allow-other-keys)
(declare (ignore insn cost c-name))
(unless immediate-arg
Expand Down Expand Up @@ -476,3 +482,29 @@
(%simd-pack-low x)
(%shuffle-subints xval xval imm 16)))))

;; Align

(defun alignr-pi8 (x y imm)
(declare (type sse-pack x y))
(let ((xval (%sse-pack-to-int x))
(yval (%sse-pack-to-int y))
(shift (- (* 8 imm))))
(truly-the int-sse-pack
(%int-to-sse-pack int-sse-pack
(logand (ash (logior (ash xval 128) yval) shift)
#xffffffffffffffffffffffffffffffff)))))

;; Blend

(defun blend-pi16 (x y imm)
(declare (type sse-pack x y))
(let ((xval (%sse-pack-to-int x))
(yval (%sse-pack-to-int y))
(mask (loop with mask = 0 ; expand selection mask into words
for i to 7
when (= (ldb (byte 1 i) imm) 1)
do (setf mask (logior mask (ash #xffff (* 16 i))))
finally (return mask))))
(truly-the int-sse-pack
(%int-to-sse-pack int-sse-pack
(logxor yval (logand (logxor yval xval) mask))))))
105 changes: 100 additions & 5 deletions sse-intrinsics.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,22 @@
(def-binary-intrinsic min-ps float-sse-pack minps 3 "_mm_min_ps":commutative t)
(def-binary-intrinsic max-ss float-sse-pack maxss 3 "_mm_max_ss")
(def-binary-intrinsic max-ps float-sse-pack maxps 3 "_mm_max_ps" :commutative t)
(def-binary-intrinsic addsub-ps float-sse-pack addsubps 3 "_mm_addsub_ps")
(def-binary-intrinsic hadd-ps float-sse-pack haddps 5 "_mm_hadd_ps")
(def-binary-intrinsic hsub-ps float-sse-pack hsubps 5 "_mm_hsub_ps")
(def-binary-intrinsic dp-ps float-sse-pack dpps 11 "_mm_dp_ps"
:commutative t :immediate-arg (unsigned-byte 8))

(def-unary-intrinsic sqrt-ss float-sse-pack sqrtss 20 "_mm_sqrt_ss" :partial t)
(def-unary-intrinsic sqrt-ps float-sse-pack sqrtps 20 "_mm_sqrt_ps")
(def-unary-intrinsic rsqrt-ss float-sse-pack rsqrtss 20 "_mm_rsqrt_ss" :partial t)
(def-unary-intrinsic rsqrt-ps float-sse-pack rsqrtps 20 "_mm_rsqrt_ps")
(def-unary-intrinsic rcp-ss float-sse-pack rcpss 13 "_mm_rcp_ss" :partial t)
(def-unary-intrinsic rcp-ps float-sse-pack rcpps 13 "_mm_rcp_ps")
(def-unary-intrinsic round-ps float-sse-pack roundps 3 "_mm_round_ps"
:immediate-arg (unsigned-byte 4))
(def-unary-intrinsic round-ss float-sse-pack roundss 3 "_mm_round_ss" :partial t
:immediate-arg (unsigned-byte 4))

;; Bitwise logic

Expand Down Expand Up @@ -227,8 +236,16 @@
(def-binary-intrinsic movehl-ps float-sse-pack movhlps 1 "_mm_movehl_ps")
(def-binary-intrinsic movelh-ps float-sse-pack movlhps 1 "_mm_movelh_ps")

(def-unary-intrinsic movehdup-ps float-sse-pack movshdup 1 "_mm_movehdup_ps")
(def-unary-intrinsic moveldup-ps float-sse-pack movsldup 1 "_mm_moveldup_ps")

(def-unary-intrinsic movemask-ps (unsigned-byte 4) movmskps 1 "_mm_movemask_ps" :arg-type float-sse-pack)

(def-binary-intrinsic blend-ps float-sse-pack blendps 1 "_mm_blend_ps"
:immediate-arg (unsigned-byte 8))
(def-ternary-intrinsic blendv-ps float-sse-pack blendvps 2 "_mm_blendv_ps"
:implicit-reg t)

;; Shuffle

(def-binary-intrinsic shuffle-ps float-sse-pack shufps 1 "_mm_shuffle_ps" :immediate-arg (unsigned-byte 8))
Expand Down Expand Up @@ -306,9 +323,18 @@
(def-binary-intrinsic min-pd double-sse-pack minpd 3 "_mm_min_pd" :commutative t)
(def-binary-intrinsic max-sd double-sse-pack maxsd 3 "_mm_max_sd")
(def-binary-intrinsic max-pd double-sse-pack maxpd 3 "_mm_max_pd" :commutative t)
(def-binary-intrinsic addsub-pd double-sse-pack addsubpd 3 "_mm_addsub_pd")
(def-binary-intrinsic hadd-pd double-sse-pack haddpd 5 "_mm_hadd_pd")
(def-binary-intrinsic hsub-pd double-sse-pack hsubpd 5 "_mm_hsub_pd")
(def-binary-intrinsic dp-pd double-sse-pack dppd 9 "_mm_dp_pd"
:commutative t :immediate-arg (unsigned-byte 8))

(def-binary-intrinsic sqrt-sd double-sse-pack sqrtsd 20 "_mm_sqrt_sd")
(def-unary-intrinsic sqrt-sd double-sse-pack sqrtsd 20 "_mm_sqrt_sd" :partial t)
(def-unary-intrinsic sqrt-pd double-sse-pack sqrtpd 20 "_mm_sqrt_pd")
(def-unary-intrinsic round-pd double-sse-pack roundpd 3 "_mm_round_pd"
:immediate-arg (unsigned-byte 4))
(def-unary-intrinsic round-sd double-sse-pack roundsd 3 "_mm_round_sd" :partial t
:immediate-arg (unsigned-byte 4))

;; Bitwise logic

Expand Down Expand Up @@ -378,6 +404,12 @@
(def-binary-intrinsic move-sd double-sse-pack movsd 1 "_mm_move_sd")

(def-unary-intrinsic movemask-pd (unsigned-byte 2) movmskpd 1 "_mm_movemask_pd" :arg-type double-sse-pack)
(def-unary-intrinsic movedup-pd double-sse-pack movddup 1 "_mm_movedup_pd")

(def-binary-intrinsic blend-pd double-sse-pack blendpd 1 "_mm_blend_pd"
:immediate-arg (unsigned-byte 8))
(def-ternary-intrinsic blendv-pd double-sse-pack blendvpd 2 "_mm_blendv_pd"
:implicit-reg t)

;; Shuffle

Expand Down Expand Up @@ -467,6 +499,8 @@

(def-load-intrinsic mem-ref-si64 int-sse-pack movd "_mm_loadl_epi64")

(def-load-intrinsic stream-load-pi int-sse-pack movntdqa "_mm_stream_load_si128")

(def-store-intrinsic mem-set-pi int-sse-pack movdqu "_mm_storeu_si128" :setf-name mem-ref-pi)
(def-store-intrinsic mem-set-api int-sse-pack movdqa "_mm_store_si128" :setf-name mem-ref-api)

Expand Down Expand Up @@ -536,18 +570,32 @@
(def-binary-intrinsic avg-pu8 int-sse-pack pavgb 1 "_mm_avg_epu8" :commutative t)
(def-binary-intrinsic avg-pu16 int-sse-pack pavgw 1 "_mm_avg_epu16" :commutative t)

(def-binary-intrinsic madd-pi16 int-sse-pack pmaddwd 1 "_mm_madd_epi16" :commutative t)
(def-binary-intrinsic madd-pi16 int-sse-pack pmaddwd 1 "_mm_madd_epi16" :commutative t)
(def-binary-intrinsic maddubs-pi16 int-sse-pack pmaddubsw 1 "_mm_maddubs_epi16")

(def-binary-intrinsic max-pu8 int-sse-pack pmaxub 1 "_mm_max_epu8" :commutative t)
(def-binary-intrinsic max-pi8 int-sse-pack pmaxsb 1 "_mm_max_epi8" :commutative t)
(def-binary-intrinsic max-pu16 int-sse-pack pmaxuw 1 "_mm_max_epu16" :commutative t)
(def-binary-intrinsic max-pi16 int-sse-pack pmaxsw 1 "_mm_max_epi16" :commutative t)
(def-binary-intrinsic max-pu32 int-sse-pack pmaxud 1 "_mm_max_epu32" :commutative t)
(def-binary-intrinsic max-pi32 int-sse-pack pmaxsd 1 "_mm_max_epi32" :commutative t)
(def-binary-intrinsic min-pu8 int-sse-pack pminub 1 "_mm_min_epu8" :commutative t)
(def-binary-intrinsic min-pi8 int-sse-pack pminsb 1 "_mm_min_epi8" :commutative t)
(def-binary-intrinsic min-pu16 int-sse-pack pminuw 1 "_mm_min_epu16" :commutative t)
(def-binary-intrinsic min-pi16 int-sse-pack pminsw 1 "_mm_min_epi16" :commutative t)
(def-binary-intrinsic min-pu32 int-sse-pack pminud 1 "_mm_min_epu32" :commutative t)
(def-binary-intrinsic min-pi32 int-sse-pack pminsd 1 "_mm_min_epi32" :commutative t)

(def-binary-intrinsic mulhi-pi16 int-sse-pack pmulhw 3 "_mm_mulhi_epi16" :commutative t)
(def-binary-intrinsic mulhi-pu16 int-sse-pack pmulhuw 3 "_mm_mulhi_epu16" :commutative t)
(def-binary-intrinsic mullo-pi16 int-sse-pack pmullw 3 "_mm_mullo_epi16" :commutative t)
(def-unary-intrinsic minpos-pu16 int-sse-pack phminposuw 3 "_mm_minpos_epu16")

(def-binary-intrinsic mulhi-pi16 int-sse-pack pmulhw 3 "_mm_mulhi_epi16" :commutative t)
(def-binary-intrinsic mulhi-pu16 int-sse-pack pmulhuw 3 "_mm_mulhi_epu16" :commutative t)
(def-binary-intrinsic mullo-pi16 int-sse-pack pmullw 3 "_mm_mullo_epi16" :commutative t)
(def-binary-intrinsic mullo-pi32 int-sse-pack pmulld 3 "_mm_mullo_epi32" :commutative t)
(def-binary-intrinsic mulhrs-pi16 int-sse-pack pmulhrsw 3 "_mm_mulhrs_epi16" :commutative t)

(def-binary-intrinsic mul-pu32 int-sse-pack pmuludq 3 "_mm_mul_epu32" :commutative t)
(def-binary-intrinsic mul-pi32 int-sse-pack pmuldq 3 "_mm_mul_epi32" :commutative t)

(def-binary-intrinsic sad-pu8 int-sse-pack psadbw 1 "_mm_sad_epu8" :commutative t)

Expand All @@ -561,6 +609,24 @@
(def-binary-intrinsic subs-pu8 int-sse-pack psubusb 1 "_mm_subs_epu8")
(def-binary-intrinsic subs-pu16 int-sse-pack psubusw 1 "_mm_subs_epu16")

(def-binary-intrinsic hadd-pi16 int-sse-pack phaddw 1 "_mm_hadd_epi16")
(def-binary-intrinsic hadd-pi32 int-sse-pack phaddd 1 "_mm_hadd_epi32")
(def-binary-intrinsic hsub-pi16 int-sse-pack phsubw 1 "_mm_hsub_epi16")
(def-binary-intrinsic hsub-pi32 int-sse-pack phsubd 1 "_mm_hsub_epi32")
(def-binary-intrinsic hadds-pi16 int-sse-pack phaddsw 1 "_mm_hadds_epi16")
(def-binary-intrinsic hsubs-pi16 int-sse-pack phsubsw 1 "_mm_hsubs_epi16")

(def-binary-intrinsic sign-pi8 int-sse-pack psignb 1 "_mm_sign_epi8")
(def-binary-intrinsic sign-pi16 int-sse-pack psignw 1 "_mm_sign_epi16")
(def-binary-intrinsic sign-pi32 int-sse-pack psignd 1 "_mm_sign_epi32")

(def-unary-intrinsic abs-pi8 int-sse-pack pabsb 1 "_mm_abs_epi8")
(def-unary-intrinsic abs-pi16 int-sse-pack pabsw 1 "_mm_abs_epi16")
(def-unary-intrinsic abs-pi32 int-sse-pack pabsd 1 "_mm_abs_epi32")

(def-binary-intrinsic mpsad-pu8 int-sse-pack mpsadbw 4 "_mm_mpsadbw_epu8"
:immediate-arg (unsigned-byte 3))

;; Bitwise logic

#+sbcl
Expand Down Expand Up @@ -638,6 +704,7 @@
(def-binary-intrinsic =-pi8 int-sse-pack pcmpeqb 1 "_mm_cmpeq_epi8")
(def-binary-intrinsic =-pi16 int-sse-pack pcmpeqw 1 "_mm_cmpeq_epi16")
(def-binary-intrinsic =-pi32 int-sse-pack pcmpeqd 1 "_mm_cmpeq_epi32")
(def-binary-intrinsic =-pi64 int-sse-pack pcmpeqq 1 "_mm_cmpeq_epi64")

#+ecl
(def-binary-intrinsic <-pi8 int-sse-pack nil nil "_mm_cmplt_epi8")
Expand All @@ -649,17 +716,23 @@
(def-binary-intrinsic >-pi8 int-sse-pack pcmpgtb 1 "_mm_cmpgt_epi8")
(def-binary-intrinsic >-pi16 int-sse-pack pcmpgtw 1 "_mm_cmpgt_epi16")
(def-binary-intrinsic >-pi32 int-sse-pack pcmpgtd 1 "_mm_cmpgt_epi32")
(def-binary-intrinsic >-pi64 int-sse-pack pcmpgtq 1 "_mm_cmpgt_epi64")

;; Misc

(def-binary-intrinsic packs-pi16 int-sse-pack packsswb 1 "_mm_packs_epi16")
(def-binary-intrinsic packs-pi32 int-sse-pack packssdw 1 "_mm_packs_epi32")
(def-binary-intrinsic packus-pi16 int-sse-pack packuswb 1 "_mm_packus_epi16")
(def-binary-intrinsic packus-pi32 int-sse-pack packusdw 1 "_mm_packus_epi32")

(def-unary-intrinsic extract-pi16 (unsigned-byte 16) pextrw 1 "_mm_extract_epi16"
:immediate-arg (unsigned-byte 8) :arg-type int-sse-pack)
(def-sse-int-intrinsic insert-pi8 fixnum int-sse-pack pinsrb 1 "_mm_insert_epi8"
:immediate-arg (unsigned-byte 8))
(def-sse-int-intrinsic insert-pi16 fixnum int-sse-pack pinsrw 1 "_mm_insert_epi16"
:immediate-arg (unsigned-byte 8))
(def-sse-int-intrinsic insert-pi32 fixnum int-sse-pack pinsrd 1 "_mm_insert_epi32"
:immediate-arg (unsigned-byte 8))

(def-unary-intrinsic movemask-pi8 (unsigned-byte 16) pmovmskb 1 "_mm_movemask_epi8" :arg-type int-sse-pack)

Expand All @@ -673,11 +746,20 @@
(def-binary-intrinsic unpacklo-pi32 int-sse-pack punpckldq 1 "_mm_unpacklo_epi32")
(def-binary-intrinsic unpacklo-pi64 int-sse-pack punpcklqdq 1 "_mm_unpacklo_epi64")

(def-binary-intrinsic alignr-pi8 int-sse-pack palignr 1 "_mm_alignr_epi8"
:immediate-arg (unsigned-byte 8))

(def-unary-intrinsic move-pi64 int-sse-pack movq 1 "_mm_move_epi64")

(def-ternary-intrinsic blendv-pi8 int-sse-pack pblendvb 1 "_mm_blendv_epi8"
:implicit-reg t)
(def-binary-intrinsic blend-pi16 int-sse-pack pblendw 1 "_mm_blend_epi16"
:immediate-arg (unsigned-byte 8))

;; Shuffle

(def-unary-intrinsic shuffle-pi32 int-sse-pack pshufd 1 "_mm_shuffle_epi32" :immediate-arg (unsigned-byte 8))
(def-binary-intrinsic shuffle-pi8 int-sse-pack pshufb 1 "_mm_shuffle_epi8")
(def-unary-intrinsic shufflelo-pi16 int-sse-pack pshuflw 1 "_mm_shufflelo_epi16" :immediate-arg (unsigned-byte 8))
(def-unary-intrinsic shufflehi-pi16 int-sse-pack pshufhw 1 "_mm_shufflehi_epi16" :immediate-arg (unsigned-byte 8))

Expand Down Expand Up @@ -729,3 +811,16 @@
(def-unary-intrinsic convert-pi-to-su64 (unsigned-byte 64) movd 1
#-msvc "_mm_cvtsi128_si64" #+msvc "_mm_cvtsi128_si64x" :arg-type int-sse-pack)

(def-unary-intrinsic convert-pi8-to-pi16 int-sse-pack pmovsxbw 1 "_mm_cvtepi8_epi16")
(def-unary-intrinsic convert-pi8-to-pi32 int-sse-pack pmovsxbd 1 "_mm_cvtepi8_epi32")
(def-unary-intrinsic convert-pi8-to-pi64 int-sse-pack pmovsxbq 1 "_mm_cvtepi8_epi64")
(def-unary-intrinsic convert-pi16-to-pi32 int-sse-pack pmovsxwd 1 "_mm_cvtepi16_epi32")
(def-unary-intrinsic convert-pi16-to-pi64 int-sse-pack pmovsxwq 1 "_mm_cvtepi16_epi64")
(def-unary-intrinsic convert-pi32-to-pi64 int-sse-pack pmovsxdq 1 "_mm_cvtepi32_epi64")

(def-unary-intrinsic convert-pu8-to-pi16 int-sse-pack pmovzxbw 1 "_mm_cvtepu8_epi16")
(def-unary-intrinsic convert-pu8-to-pi32 int-sse-pack pmovzxbd 1 "_mm_cvtepu8_epi32")
(def-unary-intrinsic convert-pu8-to-pi64 int-sse-pack pmovzxbq 1 "_mm_cvtepu8_epi64")
(def-unary-intrinsic convert-pu16-to-pi32 int-sse-pack pmovzxwd 1 "_mm_cvtepu16_epi32")
(def-unary-intrinsic convert-pu16-to-pi64 int-sse-pack pmovzxwq 1 "_mm_cvtepu16_epi64")
(def-unary-intrinsic convert-pu32-to-pi64 int-sse-pack pmovzxdq 1 "_mm_cvtepu32_epi64")