mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[AVX-512] Add more patterns to fold masked VPTERNLOG with load when the passthru isn't operand 0.
llvm-svn: 295640
This commit is contained in:
parent
0e3c8588fe
commit
a7078c2af1
@ -8916,6 +8916,32 @@ def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
|
||||
if (Imm & 0x40) NewImm |= 0x20;
|
||||
return getI8Imm(NewImm, SDLoc(N));
|
||||
}]>;
|
||||
def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
|
||||
// Convert a VPTERNLOG immediate by moving operand 1 to the end.
|
||||
uint8_t Imm = N->getZExtValue();
|
||||
// Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
|
||||
uint8_t NewImm = Imm & 0x81;
|
||||
if (Imm & 0x02) NewImm |= 0x04;
|
||||
if (Imm & 0x04) NewImm |= 0x10;
|
||||
if (Imm & 0x08) NewImm |= 0x40;
|
||||
if (Imm & 0x10) NewImm |= 0x02;
|
||||
if (Imm & 0x20) NewImm |= 0x08;
|
||||
if (Imm & 0x40) NewImm |= 0x20;
|
||||
return getI8Imm(NewImm, SDLoc(N));
|
||||
}]>;
|
||||
def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
|
||||
// Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
|
||||
uint8_t Imm = N->getZExtValue();
|
||||
// Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
|
||||
uint8_t NewImm = Imm & 0x81;
|
||||
if (Imm & 0x02) NewImm |= 0x10;
|
||||
if (Imm & 0x04) NewImm |= 0x02;
|
||||
if (Imm & 0x08) NewImm |= 0x20;
|
||||
if (Imm & 0x10) NewImm |= 0x04;
|
||||
if (Imm & 0x20) NewImm |= 0x40;
|
||||
if (Imm & 0x40) NewImm |= 0x08;
|
||||
return getI8Imm(NewImm, SDLoc(N));
|
||||
}]>;
|
||||
|
||||
multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _>{
|
||||
@ -8992,6 +9018,30 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src2, _.RC:$src1,
|
||||
(bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src1, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
|
||||
|
||||
// Additional patterns for matching broadcasts in other positions.
|
||||
def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
|
@ -228,9 +228,8 @@ define <16 x i32> @vpternlog_v16i32_012_load0_mask(<16 x i32>* %x0ptr, <16 x i32
|
||||
define <16 x i32> @vpternlog_v16i32_012_load0_mask1(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_012_load0_mask1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpternlogd $9, %zmm1, %zmm2, %zmm0 {%k1}
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
||||
@ -242,9 +241,8 @@ define <16 x i32> @vpternlog_v16i32_012_load0_mask1(<16 x i32>* %x0ptr, <16 x i3
|
||||
define <16 x i32> @vpternlog_v16i32_012_load0_mask2(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_012_load0_mask2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
||||
@ -268,9 +266,8 @@ define <16 x i32> @vpternlog_v16i32_012_load1_mask(<16 x i32> %x0, <16 x i32>* %
|
||||
define <16 x i32> @vpternlog_v16i32_012_load1_mask2(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_012_load1_mask2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm0, %zmm2, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
||||
@ -294,9 +291,8 @@ define <16 x i32> @vpternlog_v16i32_012_load2_mask(<16 x i32> %x0, <16 x i32> %x
|
||||
define <16 x i32> @vpternlog_v16i32_012_load2_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_012_load2_mask1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpternlogd $9, %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
||||
|
Loading…
Reference in New Issue
Block a user