2018-06-03 16:11:34 +02:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2018-09-30 05:01:46 +02:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI1,BMI1-SLOW
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI2,BMI2-SLOW
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST,BMI1,BMI1-FAST
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST,BMI2,BMI2-FAST
|
2018-06-03 16:11:34 +02:00
|
|
|
|
|
|
|
declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
|
|
|
|
|
|
|
|
define i64 @bextr64(i64 %x, i64 %y) {
|
|
|
|
; CHECK-LABEL: bextr64:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: bextrq %rsi, %rdi, %rax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
|
|
|
|
ret i64 %tmp
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @bextr64b(i64 %x) uwtable ssp {
|
2018-09-30 05:01:46 +02:00
|
|
|
; BEXTR-SLOW-LABEL: bextr64b:
|
|
|
|
; BEXTR-SLOW: # %bb.0:
|
|
|
|
; BEXTR-SLOW-NEXT: movq %rdi, %rax
|
|
|
|
; BEXTR-SLOW-NEXT: shrl $4, %eax
|
|
|
|
; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF
|
|
|
|
; BEXTR-SLOW-NEXT: retq
|
|
|
|
;
|
|
|
|
; BEXTR-FAST-LABEL: bextr64b:
|
|
|
|
; BEXTR-FAST: # %bb.0:
|
|
|
|
; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04
|
|
|
|
; BEXTR-FAST-NEXT: bextrl %eax, %edi, %eax
|
|
|
|
; BEXTR-FAST-NEXT: retq
|
2018-06-03 16:11:34 +02:00
|
|
|
%1 = lshr i64 %x, 4
|
|
|
|
%2 = and i64 %1, 4095
|
|
|
|
ret i64 %2
|
|
|
|
}
|
|
|
|
|
|
|
|
; Make sure we still use the AH subreg trick to extract 15:8
|
|
|
|
define i64 @bextr64_subreg(i64 %x) uwtable ssp {
|
|
|
|
; CHECK-LABEL: bextr64_subreg:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
|
|
; CHECK-NEXT: movzbl %ah, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%1 = lshr i64 %x, 8
|
|
|
|
%2 = and i64 %1, 255
|
|
|
|
ret i64 %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @bextr64b_load(i64* %x) {
|
2018-09-30 05:01:46 +02:00
|
|
|
; BEXTR-SLOW-LABEL: bextr64b_load:
|
|
|
|
; BEXTR-SLOW: # %bb.0:
|
|
|
|
; BEXTR-SLOW-NEXT: movl (%rdi), %eax
|
|
|
|
; BEXTR-SLOW-NEXT: shrl $4, %eax
|
|
|
|
; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF
|
|
|
|
; BEXTR-SLOW-NEXT: retq
|
|
|
|
;
|
|
|
|
; BEXTR-FAST-LABEL: bextr64b_load:
|
|
|
|
; BEXTR-FAST: # %bb.0:
|
|
|
|
; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04
|
|
|
|
; BEXTR-FAST-NEXT: bextrl %eax, (%rdi), %eax
|
|
|
|
; BEXTR-FAST-NEXT: retq
|
2018-06-03 16:11:34 +02:00
|
|
|
%1 = load i64, i64* %x, align 8
|
|
|
|
%2 = lshr i64 %1, 4
|
|
|
|
%3 = and i64 %2, 4095
|
|
|
|
ret i64 %3
|
|
|
|
}
|
|
|
|
|
|
|
|
; PR34042
|
|
|
|
define i64 @bextr64c(i64 %x, i32 %y) {
|
|
|
|
; CHECK-LABEL: bextr64c:
|
|
|
|
; CHECK: # %bb.0:
|
2018-06-06 12:52:10 +02:00
|
|
|
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
|
|
|
|
; CHECK-NEXT: bextrq %rsi, %rdi, %rax
|
2018-06-03 16:11:34 +02:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%tmp0 = sext i32 %y to i64
|
|
|
|
%tmp1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %tmp0)
|
|
|
|
ret i64 %tmp1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @bextr64d(i64 %a) {
|
2018-09-30 05:01:46 +02:00
|
|
|
; BMI1-SLOW-LABEL: bextr64d:
|
|
|
|
; BMI1-SLOW: # %bb.0: # %entry
|
|
|
|
; BMI1-SLOW-NEXT: shrq $2, %rdi
|
|
|
|
; BMI1-SLOW-NEXT: movl $8448, %eax # imm = 0x2100
|
|
|
|
; BMI1-SLOW-NEXT: bextrq %rax, %rdi, %rax
|
|
|
|
; BMI1-SLOW-NEXT: retq
|
|
|
|
;
|
|
|
|
; BMI2-SLOW-LABEL: bextr64d:
|
|
|
|
; BMI2-SLOW: # %bb.0: # %entry
|
[X86] X86DAGToDAGISel::matchBEXTRFromAndImm(): if can't use BEXTR, fallback to BZHI is profitable (PR43381)
Summary:
PR43381 notes that while we are good at matching `(X >> C1) & C2` as BEXTR/BEXTRI,
we only do that if we either have BEXTRI (TBM),
or if BEXTR is marked as being fast (`-mattr=+fast-bextr`).
In all other cases we don't match.
But that is mainly only true for AMD CPU's.
However, for all the CPU's for which we have sched models,
the BZHI is always fast (or the sched models are all bad.)
So if we decide that it's unprofitable to emit BEXTR/BEXTRI,
we should consider falling-back to BZHI if it is available,
and follow-up with the shift.
While it's really tempting to do something because it's cool
it is wise to first think whether it actually makes sense to do.
We shouldn't just use BZHI because we can, but only it it is beneficial.
In particular, it isn't really worth it if the input is a register,
mask is small, or we can fold a load.
But it is worth it if the mask does not fit into 32-bits.
(careful, i don't know much about intel cpu's, my choice of `-mcpu` may be bad here)
Thus we manage to fold a load:
https://godbolt.org/z/Er0OQz
Or if we'd end up using BZHI anyways because the mask is large:
https://godbolt.org/z/dBJ_5h
But this isn'r actually profitable in general case,
e.g. here we'd increase microop count
(the register renaming is free, mca does not model that there it seems)
https://godbolt.org/z/k6wFoz
Likewise, not worth it if we just get load folding:
https://godbolt.org/z/1M1deG
https://bugs.llvm.org/show_bug.cgi?id=43381
Reviewers: RKSimon, craig.topper, davezarzycki, spatel
Reviewed By: craig.topper, davezarzycki
Subscribers: andreadb, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67875
llvm-svn: 372532
2019-09-23 00:04:29 +02:00
|
|
|
; BMI2-SLOW-NEXT: movl $35, %eax
|
2018-09-30 05:01:46 +02:00
|
|
|
; BMI2-SLOW-NEXT: bzhiq %rax, %rdi, %rax
|
[X86] X86DAGToDAGISel::matchBEXTRFromAndImm(): if can't use BEXTR, fallback to BZHI is profitable (PR43381)
Summary:
PR43381 notes that while we are good at matching `(X >> C1) & C2` as BEXTR/BEXTRI,
we only do that if we either have BEXTRI (TBM),
or if BEXTR is marked as being fast (`-mattr=+fast-bextr`).
In all other cases we don't match.
But that is mainly only true for AMD CPU's.
However, for all the CPU's for which we have sched models,
the BZHI is always fast (or the sched models are all bad.)
So if we decide that it's unprofitable to emit BEXTR/BEXTRI,
we should consider falling-back to BZHI if it is available,
and follow-up with the shift.
While it's really tempting to do something because it's cool
it is wise to first think whether it actually makes sense to do.
We shouldn't just use BZHI because we can, but only it it is beneficial.
In particular, it isn't really worth it if the input is a register,
mask is small, or we can fold a load.
But it is worth it if the mask does not fit into 32-bits.
(careful, i don't know much about intel cpu's, my choice of `-mcpu` may be bad here)
Thus we manage to fold a load:
https://godbolt.org/z/Er0OQz
Or if we'd end up using BZHI anyways because the mask is large:
https://godbolt.org/z/dBJ_5h
But this isn'r actually profitable in general case,
e.g. here we'd increase microop count
(the register renaming is free, mca does not model that there it seems)
https://godbolt.org/z/k6wFoz
Likewise, not worth it if we just get load folding:
https://godbolt.org/z/1M1deG
https://bugs.llvm.org/show_bug.cgi?id=43381
Reviewers: RKSimon, craig.topper, davezarzycki, spatel
Reviewed By: craig.topper, davezarzycki
Subscribers: andreadb, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67875
llvm-svn: 372532
2019-09-23 00:04:29 +02:00
|
|
|
; BMI2-SLOW-NEXT: shrq $2, %rax
|
2018-09-30 05:01:46 +02:00
|
|
|
; BMI2-SLOW-NEXT: retq
|
|
|
|
;
|
|
|
|
; BEXTR-FAST-LABEL: bextr64d:
|
|
|
|
; BEXTR-FAST: # %bb.0: # %entry
|
|
|
|
; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102
|
|
|
|
; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax
|
|
|
|
; BEXTR-FAST-NEXT: retq
|
2018-06-03 16:11:34 +02:00
|
|
|
entry:
|
|
|
|
%shr = lshr i64 %a, 2
|
|
|
|
%and = and i64 %shr, 8589934591
|
|
|
|
ret i64 %and
|
|
|
|
}
|
|
|
|
|
2019-09-22 21:36:38 +02:00
|
|
|
define i64 @bextr64d_load(i64* %aptr) {
|
|
|
|
; BMI1-SLOW-LABEL: bextr64d_load:
|
|
|
|
; BMI1-SLOW: # %bb.0: # %entry
|
|
|
|
; BMI1-SLOW-NEXT: movq (%rdi), %rax
|
|
|
|
; BMI1-SLOW-NEXT: shrq $2, %rax
|
|
|
|
; BMI1-SLOW-NEXT: movl $8448, %ecx # imm = 0x2100
|
|
|
|
; BMI1-SLOW-NEXT: bextrq %rcx, %rax, %rax
|
|
|
|
; BMI1-SLOW-NEXT: retq
|
|
|
|
;
|
|
|
|
; BMI2-SLOW-LABEL: bextr64d_load:
|
|
|
|
; BMI2-SLOW: # %bb.0: # %entry
|
[X86] X86DAGToDAGISel::matchBEXTRFromAndImm(): if can't use BEXTR, fallback to BZHI is profitable (PR43381)
Summary:
PR43381 notes that while we are good at matching `(X >> C1) & C2` as BEXTR/BEXTRI,
we only do that if we either have BEXTRI (TBM),
or if BEXTR is marked as being fast (`-mattr=+fast-bextr`).
In all other cases we don't match.
But that is mainly only true for AMD CPU's.
However, for all the CPU's for which we have sched models,
the BZHI is always fast (or the sched models are all bad.)
So if we decide that it's unprofitable to emit BEXTR/BEXTRI,
we should consider falling-back to BZHI if it is available,
and follow-up with the shift.
While it's really tempting to do something because it's cool
it is wise to first think whether it actually makes sense to do.
We shouldn't just use BZHI because we can, but only it it is beneficial.
In particular, it isn't really worth it if the input is a register,
mask is small, or we can fold a load.
But it is worth it if the mask does not fit into 32-bits.
(careful, i don't know much about intel cpu's, my choice of `-mcpu` may be bad here)
Thus we manage to fold a load:
https://godbolt.org/z/Er0OQz
Or if we'd end up using BZHI anyways because the mask is large:
https://godbolt.org/z/dBJ_5h
But this isn'r actually profitable in general case,
e.g. here we'd increase microop count
(the register renaming is free, mca does not model that there it seems)
https://godbolt.org/z/k6wFoz
Likewise, not worth it if we just get load folding:
https://godbolt.org/z/1M1deG
https://bugs.llvm.org/show_bug.cgi?id=43381
Reviewers: RKSimon, craig.topper, davezarzycki, spatel
Reviewed By: craig.topper, davezarzycki
Subscribers: andreadb, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67875
llvm-svn: 372532
2019-09-23 00:04:29 +02:00
|
|
|
; BMI2-SLOW-NEXT: movl $35, %eax
|
|
|
|
; BMI2-SLOW-NEXT: bzhiq %rax, (%rdi), %rax
|
2019-09-22 21:36:38 +02:00
|
|
|
; BMI2-SLOW-NEXT: shrq $2, %rax
|
|
|
|
; BMI2-SLOW-NEXT: retq
|
|
|
|
;
|
|
|
|
; BEXTR-FAST-LABEL: bextr64d_load:
|
|
|
|
; BEXTR-FAST: # %bb.0: # %entry
|
|
|
|
; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102
|
|
|
|
; BEXTR-FAST-NEXT: bextrq %rax, (%rdi), %rax
|
|
|
|
; BEXTR-FAST-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%a = load i64, i64* %aptr, align 8
|
|
|
|
%shr = lshr i64 %a, 2
|
|
|
|
%and = and i64 %shr, 8589934591
|
|
|
|
ret i64 %and
|
|
|
|
}
|
|
|
|
|
2018-06-03 16:11:34 +02:00
|
|
|
define i64 @non_bextr64(i64 %x) {
|
|
|
|
; CHECK-LABEL: non_bextr64:
|
|
|
|
; CHECK: # %bb.0: # %entry
|
|
|
|
; CHECK-NEXT: shrq $2, %rdi
|
|
|
|
; CHECK-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE
|
|
|
|
; CHECK-NEXT: andq %rdi, %rax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%shr = lshr i64 %x, 2
|
|
|
|
%and = and i64 %shr, 8589934590
|
|
|
|
ret i64 %and
|
|
|
|
}
|