diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index b3b8452067c..5fda485dc8e 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -8916,6 +8916,32 @@ def VPTERNLOG132_imm8 : SDNodeXForm; +def VPTERNLOG231_imm8 : SDNodeXFormgetZExtValue(); + // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 + uint8_t NewImm = Imm & 0x81; + if (Imm & 0x02) NewImm |= 0x04; + if (Imm & 0x04) NewImm |= 0x10; + if (Imm & 0x08) NewImm |= 0x40; + if (Imm & 0x10) NewImm |= 0x02; + if (Imm & 0x20) NewImm |= 0x08; + if (Imm & 0x40) NewImm |= 0x20; + return getI8Imm(NewImm, SDLoc(N)); +}]>; +def VPTERNLOG312_imm8 : SDNodeXFormgetZExtValue(); + // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 + uint8_t NewImm = Imm & 0x81; + if (Imm & 0x02) NewImm |= 0x10; + if (Imm & 0x04) NewImm |= 0x02; + if (Imm & 0x08) NewImm |= 0x20; + if (Imm & 0x10) NewImm |= 0x04; + if (Imm & 0x20) NewImm |= 0x40; + if (Imm & 0x40) NewImm |= 0x08; + return getI8Imm(NewImm, SDLoc(N)); +}]>; multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ @@ -8992,6 +9018,30 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, _.RC:$src1)), (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode (bitconvert (_.LdFrag addr:$src3)), + _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src1)), + (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode _.RC:$src2, _.RC:$src1, + (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), + _.RC:$src1)), + (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, + _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), + _.RC:$src1, (i8 imm:$src4)), + _.RC:$src1)), + (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, + _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode (bitconvert (_.LdFrag addr:$src3)), + _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + _.RC:$src1)), + (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, + _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; // Additional patterns for matching broadcasts in other positions. def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), diff --git a/test/CodeGen/X86/avx512-vpternlog-commute.ll b/test/CodeGen/X86/avx512-vpternlog-commute.ll index 684d2c9cb36..afb223a1793 100644 --- a/test/CodeGen/X86/avx512-vpternlog-commute.ll +++ b/test/CodeGen/X86/avx512-vpternlog-commute.ll @@ -228,9 +228,8 @@ define <16 x i32> @vpternlog_v16i32_012_load0_mask(<16 x i32>* %x0ptr, <16 x i32 define <16 x i32> @vpternlog_v16i32_012_load0_mask1(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load0_mask1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2 ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpternlogd $9, %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x0 = load <16 x i32>, <16 x i32>* %x0ptr %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) @@ -242,9 +241,8 @@ define <16 x i32> @vpternlog_v16i32_012_load0_mask1(<16 x i32>* %x0ptr, <16 x i3 define <16 x i32> @vpternlog_v16i32_012_load0_mask2(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load0_mask2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2 ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %x0 = load <16 x i32>, <16 x i32>* %x0ptr @@ -268,9 +266,8 @@ define <16 x i32> @vpternlog_v16i32_012_load1_mask(<16 x i32> %x0, <16 x i32>* % define <16 x i32> @vpternlog_v16i32_012_load1_mask2(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load1_mask2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2 ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpternlogd $33, %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %x1 = load <16 x i32>, <16 x i32>* %x1ptr @@ -294,9 +291,8 @@ define <16 x i32> @vpternlog_v16i32_012_load2_mask(<16 x i32> %x0, <16 x i32> %x define <16 x i32> @vpternlog_v16i32_012_load2_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load2_mask1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2 ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpternlogd $9, %zmm2, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2ptr