diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp index 11fbb37b118..e16382e956c 100644 --- a/lib/Target/X86/X86InstrFoldTables.cpp +++ b/lib/Target/X86/X86InstrFoldTables.cpp @@ -5521,6 +5521,12 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = { { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS }, { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS }, { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS }, + { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D }, + { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D }, + { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D }, + { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q }, + { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q }, + { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q }, }; static const X86MemoryFoldTableEntry * diff --git a/test/CodeGen/X86/avx512-broadcast-unfold.ll b/test/CodeGen/X86/avx512-broadcast-unfold.ll index 6448b3c6ef4..c3d5f23d0fd 100644 --- a/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -4641,15 +4641,16 @@ define void @bcast_unfold_vpternlog_v16i32(i32* %arg, i32* %arg1) { ; CHECK-LABEL: bcast_unfold_vpternlog_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB131_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0 -; CHECK-NEXT: vmovdqu64 4096(%rsi,%rax), %zmm1 -; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 -; CHECK-NEXT: vpternlogd $216, {{.*}}(%rip){1to16}, %zmm0, %zmm1 -; CHECK-NEXT: vpmulld %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm1 +; CHECK-NEXT: vmovdqu64 4096(%rsi,%rax), %zmm2 +; CHECK-NEXT: vpmulld %zmm2, %zmm1, %zmm3 +; CHECK-NEXT: vpternlogd $216, %zmm0, %zmm1, %zmm2 +; CHECK-NEXT: vpmulld %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB131_1 ; CHECK-NEXT: # %bb.2: # %bb20