1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[InterleaveAccess] Copy fast math flags when adjusting binary operators in interleave access pass

The Interleave Access pass will convert shuffle(binop(load, load)) to
binop(shuffle(load), shuffle(load)), in order to create more
interleaving load patterns (VLD2/3/4) that might have been messed up by
instcombine. As shown in D104247 we were missing copying IR flags to the
new instruction though, which should just be kept the same as the
original instruction.

Differential Revision: https://reviews.llvm.org/D104255
This commit is contained in:
David Green 2021-06-17 09:53:33 +01:00
parent a8b1d6117d
commit 751ee64aee
4 changed files with 31 additions and 31 deletions

View File

@ -248,11 +248,11 @@ public:
}
#include "llvm/IR/Instruction.def"
static BinaryOperator *CreateWithCopiedFlags(BinaryOps Opc,
Value *V1, Value *V2,
Instruction *CopyO,
const Twine &Name = "") {
BinaryOperator *BO = Create(Opc, V1, V2, Name);
static BinaryOperator *
CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO,
const Twine &Name = "",
Instruction *InsertBefore = nullptr) {
BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore);
BO->copyIRFlags(CopyO);
return BO;
}

View File

@ -408,8 +408,8 @@ bool InterleavedAccess::replaceBinOpShuffles(
auto *NewSVI2 = new ShuffleVectorInst(
BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
SVI->getName(), SVI);
Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
BI->getName(), SVI);
BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
SVI->replaceAllUsesWith(NewBI);
LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
<< "\n With : " << *NewSVI1 << "\n And : "

View File

@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) {
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]]
; CHECK-NEXT: ret <4 x float> [[L6]]
;
@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) {
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]]
; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]]
; CHECK-NEXT: ret <4 x float> [[L9]]
;
@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) {
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]]
; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]]
; CHECK-NEXT: ret <4 x float> [[L12]]
;
@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1
; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]]
; CHECK-NEXT: ret <4 x float> [[L8]]
;
@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1
; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]]
; CHECK-NEXT: ret <4 x float> [[L8]]

View File

@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) {
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]]
; CHECK-NEXT: ret <4 x float> [[L6]]
;
@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) {
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]]
; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]]
; CHECK-NEXT: ret <4 x float> [[L9]]
;
@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) {
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]]
; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]]
; CHECK-NEXT: ret <4 x float> [[L12]]
;
@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1
; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]]
; CHECK-NEXT: ret <4 x float> [[L8]]
;
@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1
; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]]
; CHECK-NEXT: ret <4 x float> [[L8]]