mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[ARM] Lower non-extended small gathers via truncated gathers.
Corollary to 1113e06821e6baffc84b8caf96a28bf62e6d28dc this allows us to match gather that dont produce a full vector width results. They use an extended gather which is truncated back to the original type.
This commit is contained in:
parent
d7fd0583bf
commit
cd0a482d93
@ -488,27 +488,44 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
|
||||
// The size of the gather was already checked in isLegalTypeAndAlignment;
|
||||
// if it was not a full vector width an appropriate extend should follow.
|
||||
auto *Extend = Root;
|
||||
bool TruncResult = false;
|
||||
if (MemoryTy->getPrimitiveSizeInBits() < 128) {
|
||||
// Only transform gathers with exactly one use
|
||||
if (!I->hasOneUse())
|
||||
return nullptr;
|
||||
|
||||
// The correct root to replace is not the CallInst itself, but the
|
||||
// instruction which extends it
|
||||
Extend = cast<Instruction>(*I->users().begin());
|
||||
if (isa<SExtInst>(Extend)) {
|
||||
Unsigned = 0;
|
||||
} else if (!isa<ZExtInst>(Extend)) {
|
||||
LLVM_DEBUG(dbgs() << "masked gathers: extend needed but not provided. "
|
||||
<< "Expanding\n");
|
||||
return nullptr;
|
||||
if (I->hasOneUse()) {
|
||||
// If the gather has a single extend of the correct type, use an extending
|
||||
// gather and replace the ext. In which case the correct root to replace
|
||||
// is not the CallInst itself, but the instruction which extends it.
|
||||
Instruction* User = cast<Instruction>(*I->users().begin());
|
||||
if (isa<SExtInst>(User) &&
|
||||
User->getType()->getPrimitiveSizeInBits() == 128) {
|
||||
LLVM_DEBUG(dbgs() << "masked gathers: Incorporating extend: "
|
||||
<< *User << "\n");
|
||||
Extend = User;
|
||||
ResultTy = User->getType();
|
||||
Unsigned = 0;
|
||||
} else if (isa<ZExtInst>(User) &&
|
||||
User->getType()->getPrimitiveSizeInBits() == 128) {
|
||||
LLVM_DEBUG(dbgs() << "masked gathers: Incorporating extend: "
|
||||
<< *ResultTy << "\n");
|
||||
Extend = User;
|
||||
ResultTy = User->getType();
|
||||
}
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "masked gathers: found an extending gather\n");
|
||||
ResultTy = Extend->getType();
|
||||
|
||||
// If an extend hasn't been found and the type is an integer, create an
|
||||
// extending gather and truncate back to the original type.
|
||||
if (ResultTy->getPrimitiveSizeInBits() < 128 &&
|
||||
ResultTy->isIntOrIntVectorTy()) {
|
||||
ResultTy = ResultTy->getWithNewBitWidth(
|
||||
128 / cast<FixedVectorType>(ResultTy)->getNumElements());
|
||||
TruncResult = true;
|
||||
LLVM_DEBUG(dbgs() << "masked gathers: Small input type, truncing to: "
|
||||
<< *ResultTy << "\n");
|
||||
}
|
||||
|
||||
// The final size of the gather must be a full vector width
|
||||
if (ResultTy->getPrimitiveSizeInBits() != 128) {
|
||||
LLVM_DEBUG(dbgs() << "masked gathers: extending from the wrong type. "
|
||||
<< "Expanding\n");
|
||||
LLVM_DEBUG(dbgs() << "masked gathers: Extend needed but not provided "
|
||||
"from the correct type. Expanding\n");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
@ -522,18 +539,25 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
|
||||
|
||||
Root = Extend;
|
||||
Value *Mask = I->getArgOperand(2);
|
||||
Instruction *Load = nullptr;
|
||||
if (!match(Mask, m_One()))
|
||||
return Builder.CreateIntrinsic(
|
||||
Load = Builder.CreateIntrinsic(
|
||||
Intrinsic::arm_mve_vldr_gather_offset_predicated,
|
||||
{ResultTy, BasePtr->getType(), Offsets->getType(), Mask->getType()},
|
||||
{BasePtr, Offsets, Builder.getInt32(MemoryTy->getScalarSizeInBits()),
|
||||
Builder.getInt32(Scale), Builder.getInt32(Unsigned), Mask});
|
||||
else
|
||||
return Builder.CreateIntrinsic(
|
||||
Load = Builder.CreateIntrinsic(
|
||||
Intrinsic::arm_mve_vldr_gather_offset,
|
||||
{ResultTy, BasePtr->getType(), Offsets->getType()},
|
||||
{BasePtr, Offsets, Builder.getInt32(MemoryTy->getScalarSizeInBits()),
|
||||
Builder.getInt32(Scale), Builder.getInt32(Unsigned)});
|
||||
|
||||
if (TruncResult) {
|
||||
Load = TruncInst::Create(Instruction::Trunc, Load, MemoryTy);
|
||||
Builder.Insert(Load);
|
||||
}
|
||||
return Load;
|
||||
}
|
||||
|
||||
Instruction *MVEGatherScatterLowering::lowerScatter(IntrinsicInst *I) {
|
||||
|
@ -18,33 +18,9 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i8> @unscaled_v8i8_i8(i8* %base, <8 x i8>* %offptr) {
|
||||
; CHECK-LABEL: unscaled_v8i8_i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r2, r12, d0
|
||||
; CHECK-NEXT: vmov r3, lr, d1
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb.w r12, [r12]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: ldrb.w lr, [lr]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov.16 q0[0], r4
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: vmov.16 q0[1], r5
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmov.16 q0[2], r0
|
||||
; CHECK-NEXT: vmov.16 q0[3], r1
|
||||
; CHECK-NEXT: vmov.16 q0[4], r2
|
||||
; CHECK-NEXT: vmov.16 q0[5], r12
|
||||
; CHECK-NEXT: vmov.16 q0[6], r3
|
||||
; CHECK-NEXT: vmov.16 q0[7], lr
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: vldrb.u16 q1, [r1]
|
||||
; CHECK-NEXT: vldrb.u16 q0, [r0, q1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
%offs.zext = zext <8 x i8> %offs to <8 x i32>
|
||||
|
@ -314,15 +314,9 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x i16> @ptr_v4i16(<4 x i16*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v4i16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: ldrh r2, [r2]
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrh r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r1, q1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <4 x i16*>, <4 x i16*>* %offptr, align 4
|
||||
@ -658,15 +652,9 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x i8> @ptr_v4i8(<4 x i8*>* %offptr) {
|
||||
; CHECK-LABEL: ptr_v4i8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r2, r3, d0
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r2, [r2]
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1, q1]
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <4 x i8*>, <4 x i8*>* %offptr, align 4
|
||||
@ -897,33 +885,25 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i32> @sext_unsigned_unscaled_i8_i8_toi64(i8* %base, <8 x i8>* %offptr) {
|
||||
; CHECK-LABEL: sext_unsigned_unscaled_i8_i8_toi64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1, #4]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r2, r12, d1
|
||||
; CHECK-NEXT: vmov r3, lr, d0
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r1]
|
||||
; CHECK-NEXT: vadd.i32 q0, q0, r0
|
||||
; CHECK-NEXT: vmov r0, r1, d1
|
||||
; CHECK-NEXT: vmov r4, r5, d0
|
||||
; CHECK-NEXT: ldrb r6, [r2]
|
||||
; CHECK-NEXT: ldrb r3, [r3]
|
||||
; CHECK-NEXT: ldrb.w r12, [r12]
|
||||
; CHECK-NEXT: ldrb.w r2, [lr]
|
||||
; CHECK-NEXT: vmov q1[2], q1[0], r3, r6
|
||||
; CHECK-NEXT: ldrb r0, [r0]
|
||||
; CHECK-NEXT: ldrb r4, [r4]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r2, r12
|
||||
; CHECK-NEXT: ldrb r1, [r1]
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q1
|
||||
; CHECK-NEXT: ldrb r5, [r5]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
|
||||
; CHECK-NEXT: vldrb.u16 q0, [r1]
|
||||
; CHECK-NEXT: vldrb.u16 q1, [r0, q0]
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[2]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[0]
|
||||
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[3]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[1]
|
||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[6]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[4]
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q1[7]
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[5]
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
||||
; CHECK-NEXT: vmovlb.s8 q1, q2
|
||||
; CHECK-NEXT: vmovlb.s16 q1, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%offs = load <8 x i8>, <8 x i8>* %offptr, align 1
|
||||
%offs.zext = zext <8 x i8> %offs to <8 x i32>
|
||||
|
Loading…
x
Reference in New Issue
Block a user