mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[x86] fix remaining miscompile bug in horizontal binop matching (PR40243)
When we use the partial-matching function on a 128-bit chunk, we must account for the possibility that we've matched undef halves of the original source vectors, so the outputs may need to be reset. This should allow closing PR40243: https://bugs.llvm.org/show_bug.cgi?id=40243 llvm-svn: 350830
This commit is contained in:
parent
244c6fbbbb
commit
dd0362a420
@ -8370,7 +8370,6 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
|
||||
SDLoc DL(BV);
|
||||
SDValue InVec0, InVec1;
|
||||
if (VT == MVT::v8i32 || VT == MVT::v16i16) {
|
||||
// Try to match an AVX2 horizontal add/sub of signed integers.
|
||||
SDValue InVec2, InVec3;
|
||||
unsigned X86Opcode;
|
||||
bool CanFold = true;
|
||||
@ -8397,12 +8396,16 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
|
||||
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
|
||||
return SDValue();
|
||||
|
||||
// Convert this build_vector into a pair of horizontal binop followed by
|
||||
// a concat vector.
|
||||
// Convert this build_vector into a pair of horizontal binops followed by
|
||||
// a concat vector. We must adjust the outputs from the partial horizontal
|
||||
// matching calls above to account for undefined vector halves.
|
||||
SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
|
||||
SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
|
||||
assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
|
||||
bool isUndefLO = NumUndefsLO == Half;
|
||||
bool isUndefHI = NumUndefsHI == Half;
|
||||
return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
|
||||
isUndefLO, isUndefHI);
|
||||
return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
|
||||
isUndefHI);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -101,6 +101,10 @@ define <8 x i32> @PR40243_alt(<8 x i32> %a, <8 x i32> %b) {
|
||||
;
|
||||
; AVX1-LABEL: PR40243_alt:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: PR40243_alt:
|
||||
|
Loading…
Reference in New Issue
Block a user