mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[X86][SSE] combineScalarToVector - only reuse broadcasts for scalar_to_vector if the source operands scalar types match
We were hitting an issue when the scalar_to_vector source was being implicitly truncated (in this case to i8 to vXi1) but we were also using the i8 source in a broadcast to a vXi8 value. Fixes PR50374
This commit is contained in:
parent
fc07b73b83
commit
7b5702378b
@ -50473,16 +50473,20 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
|
||||
|
||||
// See if we're broadcasting the scalar value, in which case just reuse that.
|
||||
// Ensure the same SDValue from the SDNode use is being used.
|
||||
for (SDNode *User : Src->uses())
|
||||
if (User->getOpcode() == X86ISD::VBROADCAST && Src == User->getOperand(0)) {
|
||||
unsigned SizeInBits = VT.getFixedSizeInBits();
|
||||
unsigned BroadcastSizeInBits = User->getValueSizeInBits(0).getFixedSize();
|
||||
if (BroadcastSizeInBits == SizeInBits)
|
||||
return SDValue(User, 0);
|
||||
if (BroadcastSizeInBits > SizeInBits)
|
||||
return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits);
|
||||
// TODO: Handle BroadcastSizeInBits < SizeInBits when we have test coverage.
|
||||
}
|
||||
if (VT.getScalarType() == Src.getValueType())
|
||||
for (SDNode *User : Src->uses())
|
||||
if (User->getOpcode() == X86ISD::VBROADCAST &&
|
||||
Src == User->getOperand(0)) {
|
||||
unsigned SizeInBits = VT.getFixedSizeInBits();
|
||||
unsigned BroadcastSizeInBits =
|
||||
User->getValueSizeInBits(0).getFixedSize();
|
||||
if (BroadcastSizeInBits == SizeInBits)
|
||||
return SDValue(User, 0);
|
||||
if (BroadcastSizeInBits > SizeInBits)
|
||||
return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits);
|
||||
// TODO: Handle BroadcastSizeInBits < SizeInBits when we have test
|
||||
// coverage.
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
31
test/CodeGen/X86/pr50374.ll
Normal file
31
test/CodeGen/X86/pr50374.ll
Normal file
@ -0,0 +1,31 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s
|
||||
|
||||
define void @PR50374() {
|
||||
; CHECK-LABEL: PR50374:
|
||||
; CHECK: # %bb.0: # %while.84.body.preheader
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_1: # %vector.body1999
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
while.84.body.preheader:
|
||||
%0 = load i8, i8* undef, align 1
|
||||
%1 = load i8, i8* undef, align 4
|
||||
%.not14.2.2 = icmp eq i8 0, 0
|
||||
%2 = and i8 %0, 1
|
||||
%.not14.1.2 = icmp eq i8 %2, 0
|
||||
%3 = select i1 %.not14.2.2, i1 %.not14.1.2, i1 false
|
||||
%.not14.2361 = icmp eq i8 0, 0
|
||||
%4 = select i1 %3, i1 %.not14.2361, i1 false
|
||||
%add.10961.i.2.2 = select reassoc nsz contract i1 %4, float 0.000000e+00, float 0x7FF8000000000000
|
||||
%broadcast.splatinsert2024 = insertelement <8 x float> poison, float %add.10961.i.2.2, i32 0
|
||||
%broadcast.splat2025 = shufflevector <8 x float> %broadcast.splatinsert2024, <8 x float> poison, <8 x i32> zeroinitializer
|
||||
%broadcast.splatinsert2049 = insertelement <8 x i8> poison, i8 %1, i32 0
|
||||
%broadcast.splat2050 = shufflevector <8 x i8> %broadcast.splatinsert2049, <8 x i8> poison, <8 x i32> zeroinitializer
|
||||
br label %vector.body1999
|
||||
|
||||
vector.body1999: ; preds = %vector.body1999, %while.84.body.preheader
|
||||
%predphi2026 = select <8 x i1> undef, <8 x float> undef, <8 x float> %broadcast.splat2025
|
||||
%predphi2051 = select <8 x i1> undef, <8 x i8> %broadcast.splat2050, <8 x i8> undef
|
||||
br label %vector.body1999
|
||||
}
|
Loading…
Reference in New Issue
Block a user