1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[ARM] Fix incorrect handling of big-endian vmov.i64

Currently when the target is big-endian vmov.i64 reverses the order of the two
words of the vector. This is correct only when the underlying element type is
32-bit, as actually what it should be doing is considering it a vector of the
underlying type and reversing the elements of that.

Differential Revision: https://reviews.llvm.org/D76515
This commit is contained in:
John Brawn 2020-03-16 17:28:11 +00:00
parent 2f899bd67b
commit 715275dfe2
5 changed files with 130 additions and 51 deletions

View File

@ -6443,9 +6443,10 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
/// immediate" operand (e.g., VMOV). If so, return the encoded value.
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
const SDLoc &dl, EVT &VT, bool is128Bits,
const SDLoc &dl, EVT &VT, EVT VectorVT,
VMOVModImmType type) {
unsigned OpCmode, Imm;
bool is128Bits = VectorVT.is128BitVector();
// SplatBitSize is set to the smallest size that splats the vector, so a
// zero vector will always have SplatBitSize == 8. However, NEON modified
@ -6563,9 +6564,18 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
ImmMask <<= 1;
}
if (DAG.getDataLayout().isBigEndian())
// swap higher and lower 32 bit word
Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
if (DAG.getDataLayout().isBigEndian()) {
// Reverse the order of elements within the vector.
unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
unsigned Mask = (1 << BytesPerElem) - 1;
unsigned NumElems = 8 / BytesPerElem;
unsigned NewImm = 0;
for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
}
Imm = NewImm;
}
// Op=1, Cmode=1110.
OpCmode = 0x1e;
@ -6658,7 +6668,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
VMovVT, false, VMOVModImm);
VMovVT, VT, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
@ -6675,7 +6685,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
// Finally, try a VMVN.i32
NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
false, VMVNModImm);
VT, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
@ -7185,10 +7195,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
(ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
// Check if an immediate VMOV works.
EVT VmovVT;
SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VmovVT, VT.is128BitVector(),
VMOVModImm);
SDValue Val =
isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
@ -7198,9 +7207,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Try an immediate VMVN.
uint64_t NegatedImm = (~SplatBits).getZExtValue();
Val = isVMOVModifiedImm(
NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VmovVT, VT.is128BitVector(),
ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
@ -12403,8 +12411,7 @@ static SDValue PerformANDCombine(SDNode *N,
EVT VbicVT;
SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VbicVT, VT.is128BitVector(),
OtherModImm);
DAG, dl, VbicVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
@ -12708,10 +12715,9 @@ static SDValue PerformORCombine(SDNode *N,
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VorrVT;
SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VorrVT, VT.is128BitVector(),
OtherModImm);
SDValue Val =
isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));

View File

@ -98,7 +98,7 @@ entry:
define void @conv_v4i16_to_v4f16( <4 x i16> %a, <4 x half>* %store ) {
; CHECK-LABEL: conv_v4i16_to_v4f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 d16, #0xffffffff0000
; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vrev64.16 d18, d0
; CHECK-NEXT: vrev64.16 d17, d17

View File

@ -0,0 +1,88 @@
; RUN: llc < %s -mtriple armv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
; RUN: llc < %s -mtriple armebv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
; CHECK-LABEL: vmov_i8
; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xff{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <8 x i8> @vmov_i8() {
ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1>
}
; CHECK-LABEL: vmov_i16_a:
; CHECK-LE: vmov.i64 d0, #0xffff000000000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xffff{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() {
ret <4 x i16> <i16 0, i16 0, i16 0, i16 -1>
}
; CHECK-LABEL: vmov_i16_b:
; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xff{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() {
ret <4 x i16> <i16 0, i16 0, i16 0, i16 255>
}
; CHECK-LABEL: vmov_i16_c:
; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xff00{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() {
ret <4 x i16> <i16 0, i16 0, i16 0, i16 65280>
}
; CHECK-LABEL: vmov_i32_a:
; CHECK-LE: vmov.i64 d0, #0xffffffff00000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xffffffff{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() {
ret <2 x i32> <i32 0, i32 -1>
}
; CHECK-LABEL: vmov_i32_b:
; CHECK-LE: vmov.i64 d0, #0xff00000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xff{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() {
ret <2 x i32> <i32 0, i32 255>
}
; CHECK-LABEL: vmov_i32_c:
; CHECK-LE: vmov.i64 d0, #0xff0000000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xff00{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() {
ret <2 x i32> <i32 0, i32 65280>
}
; CHECK-LABEL: vmov_i32_d:
; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xff0000{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() {
ret <2 x i32> <i32 0, i32 16711680>
}
; CHECK-LABEL: vmov_i32_e:
; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
; CHECK-BE: vmov.i64 d0, #0xff000000{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() {
ret <2 x i32> <i32 0, i32 4278190080>
}
; CHECK-LABEL: vmov_i64_a:
; CHECK: vmov.i8 d0, #0xff{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() {
ret <1 x i64> <i64 -1>
}
; CHECK-LABEL: vmov_i64_b:
; CHECK: vmov.i64 d0, #0xffff00ff0000ff{{$}}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() {
ret <1 x i64> <i64 72056498804490495>
}

View File

@ -219,15 +219,10 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32f() nounwind {
}
define arm_aapcs_vfpcc <2 x i64> @v_movQi64() nounwind {
; CHECK-LE-LABEL: v_movQi64:
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
; CHECK-LE-NEXT: mov pc, lr
;
; CHECK-BE-LABEL: v_movQi64:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 q0, #0xffffff0000ff
; CHECK-BE-NEXT: mov pc, lr
; CHECK-LABEL: v_movQi64:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
; CHECK-NEXT: mov pc, lr
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
}

View File

@ -263,15 +263,10 @@ entry:
}
define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff() {
; CHECKLE-LABEL: mov_int64_ff:
; CHECKLE: @ %bb.0: @ %entry
; CHECKLE-NEXT: vmov.i64 q0, #0xff
; CHECKLE-NEXT: bx lr
;
; CHECKBE-LABEL: mov_int64_ff:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q0, #0xff00000000
; CHECKBE-NEXT: bx lr
; CHECK-LABEL: mov_int64_ff:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q0, #0xff
; CHECK-NEXT: bx lr
entry:
ret <2 x i64> < i64 255, i64 255 >
}
@ -286,15 +281,10 @@ entry:
}
define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() {
; CHECKLE-LABEL: mov_int64_ff0000ff0000ffff:
; CHECKLE: @ %bb.0: @ %entry
; CHECKLE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
; CHECKLE-NEXT: bx lr
;
; CHECKBE-LABEL: mov_int64_ff0000ff0000ffff:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q0, #0xffffff0000ff
; CHECKBE-NEXT: bx lr
; CHECK-LABEL: mov_int64_ff0000ff0000ffff:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
; CHECK-NEXT: bx lr
entry:
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
}
@ -338,7 +328,7 @@ define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f000f0f() {
;
; CHECKBE-LABEL: mov_int64_0f000f0f:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff00ff00
; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff000000ff00
; CHECKBE-NEXT: bx lr
entry:
ret <16 x i8> <i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0>
@ -352,7 +342,7 @@ define arm_aapcs_vfpcc <8 x i16> @mov_int64_ff00ffff() {
;
; CHECKBE-LABEL: mov_int64_ff00ffff:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q0, #0xffffffffffff0000
; CHECKBE-NEXT: vmov.i64 q0, #0xffff0000ffffffff
; CHECKBE-NEXT: bx lr
entry:
ret <8 x i16> <i16 -1, i16 0, i16 -1, i16 -1, i16 -1, i16 0, i16 -1, i16 -1>
@ -494,7 +484,7 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) {
;
; CHECKBE-LABEL: test:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff00ff0000
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
; CHECKBE-NEXT: vrev64.8 q2, q1
; CHECKBE-NEXT: vrev64.8 q1, q0
; CHECKBE-NEXT: vorr q1, q1, q2
@ -514,7 +504,7 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) {
;
; CHECKBE-LABEL: test2:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xffffffffffff
; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
; CHECKBE-NEXT: vrev64.16 q2, q1
; CHECKBE-NEXT: vrev64.16 q1, q0
; CHECKBE-NEXT: vorr q1, q1, q2