mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[ARM] Fix incorrect handling of big-endian vmov.i64
Currently when the target is big-endian vmov.i64 reverses the order of the two words of the vector. This is correct only when the underlying element type is 32-bit, as actually what it should be doing is considering it a vector of the underlying type and reversing the elements of that. Differential Revision: https://reviews.llvm.org/D76515
This commit is contained in:
parent
2f899bd67b
commit
715275dfe2
@ -6443,9 +6443,10 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
|
||||
/// immediate" operand (e.g., VMOV). If so, return the encoded value.
|
||||
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
|
||||
unsigned SplatBitSize, SelectionDAG &DAG,
|
||||
const SDLoc &dl, EVT &VT, bool is128Bits,
|
||||
const SDLoc &dl, EVT &VT, EVT VectorVT,
|
||||
VMOVModImmType type) {
|
||||
unsigned OpCmode, Imm;
|
||||
bool is128Bits = VectorVT.is128BitVector();
|
||||
|
||||
// SplatBitSize is set to the smallest size that splats the vector, so a
|
||||
// zero vector will always have SplatBitSize == 8. However, NEON modified
|
||||
@ -6563,9 +6564,18 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
|
||||
ImmMask <<= 1;
|
||||
}
|
||||
|
||||
if (DAG.getDataLayout().isBigEndian())
|
||||
// swap higher and lower 32 bit word
|
||||
Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
|
||||
if (DAG.getDataLayout().isBigEndian()) {
|
||||
// Reverse the order of elements within the vector.
|
||||
unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
|
||||
unsigned Mask = (1 << BytesPerElem) - 1;
|
||||
unsigned NumElems = 8 / BytesPerElem;
|
||||
unsigned NewImm = 0;
|
||||
for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
|
||||
unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
|
||||
NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
|
||||
}
|
||||
Imm = NewImm;
|
||||
}
|
||||
|
||||
// Op=1, Cmode=1110.
|
||||
OpCmode = 0x1e;
|
||||
@ -6658,7 +6668,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
|
||||
SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
|
||||
VMovVT, false, VMOVModImm);
|
||||
VMovVT, VT, VMOVModImm);
|
||||
if (NewVal != SDValue()) {
|
||||
SDLoc DL(Op);
|
||||
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
|
||||
@ -6675,7 +6685,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
// Finally, try a VMVN.i32
|
||||
NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
|
||||
false, VMVNModImm);
|
||||
VT, VMVNModImm);
|
||||
if (NewVal != SDValue()) {
|
||||
SDLoc DL(Op);
|
||||
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
|
||||
@ -7185,10 +7195,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
(ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
|
||||
// Check if an immediate VMOV works.
|
||||
EVT VmovVT;
|
||||
SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
|
||||
SplatUndef.getZExtValue(), SplatBitSize,
|
||||
DAG, dl, VmovVT, VT.is128BitVector(),
|
||||
VMOVModImm);
|
||||
SDValue Val =
|
||||
isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
|
||||
SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
|
||||
|
||||
if (Val.getNode()) {
|
||||
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
|
||||
@ -7198,9 +7207,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
// Try an immediate VMVN.
|
||||
uint64_t NegatedImm = (~SplatBits).getZExtValue();
|
||||
Val = isVMOVModifiedImm(
|
||||
NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
|
||||
DAG, dl, VmovVT, VT.is128BitVector(),
|
||||
ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
|
||||
NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
|
||||
VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
|
||||
if (Val.getNode()) {
|
||||
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
|
||||
@ -12403,8 +12411,7 @@ static SDValue PerformANDCombine(SDNode *N,
|
||||
EVT VbicVT;
|
||||
SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
|
||||
SplatUndef.getZExtValue(), SplatBitSize,
|
||||
DAG, dl, VbicVT, VT.is128BitVector(),
|
||||
OtherModImm);
|
||||
DAG, dl, VbicVT, VT, OtherModImm);
|
||||
if (Val.getNode()) {
|
||||
SDValue Input =
|
||||
DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
|
||||
@ -12708,10 +12715,9 @@ static SDValue PerformORCombine(SDNode *N,
|
||||
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
|
||||
if (SplatBitSize <= 64) {
|
||||
EVT VorrVT;
|
||||
SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
|
||||
SplatUndef.getZExtValue(), SplatBitSize,
|
||||
DAG, dl, VorrVT, VT.is128BitVector(),
|
||||
OtherModImm);
|
||||
SDValue Val =
|
||||
isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
|
||||
SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
|
||||
if (Val.getNode()) {
|
||||
SDValue Input =
|
||||
DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
|
||||
|
@ -98,7 +98,7 @@ entry:
|
||||
define void @conv_v4i16_to_v4f16( <4 x i16> %a, <4 x half>* %store ) {
|
||||
; CHECK-LABEL: conv_v4i16_to_v4f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i64 d16, #0xffffffff0000
|
||||
; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff
|
||||
; CHECK-NEXT: vldr d17, [r0]
|
||||
; CHECK-NEXT: vrev64.16 d18, d0
|
||||
; CHECK-NEXT: vrev64.16 d17, d17
|
||||
|
88
test/CodeGen/ARM/big-endian-vmov.ll
Normal file
88
test/CodeGen/ARM/big-endian-vmov.ll
Normal file
@ -0,0 +1,88 @@
|
||||
; RUN: llc < %s -mtriple armv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
|
||||
; RUN: llc < %s -mtriple armebv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
|
||||
|
||||
; CHECK-LABEL: vmov_i8
|
||||
; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xff{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <8 x i8> @vmov_i8() {
|
||||
ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i16_a:
|
||||
; CHECK-LE: vmov.i64 d0, #0xffff000000000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xffff{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() {
|
||||
ret <4 x i16> <i16 0, i16 0, i16 0, i16 -1>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i16_b:
|
||||
; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xff{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() {
|
||||
ret <4 x i16> <i16 0, i16 0, i16 0, i16 255>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i16_c:
|
||||
; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xff00{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() {
|
||||
ret <4 x i16> <i16 0, i16 0, i16 0, i16 65280>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i32_a:
|
||||
; CHECK-LE: vmov.i64 d0, #0xffffffff00000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xffffffff{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() {
|
||||
ret <2 x i32> <i32 0, i32 -1>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i32_b:
|
||||
; CHECK-LE: vmov.i64 d0, #0xff00000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xff{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() {
|
||||
ret <2 x i32> <i32 0, i32 255>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i32_c:
|
||||
; CHECK-LE: vmov.i64 d0, #0xff0000000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xff00{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() {
|
||||
ret <2 x i32> <i32 0, i32 65280>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i32_d:
|
||||
; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xff0000{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() {
|
||||
ret <2 x i32> <i32 0, i32 16711680>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i32_e:
|
||||
; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
|
||||
; CHECK-BE: vmov.i64 d0, #0xff000000{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() {
|
||||
ret <2 x i32> <i32 0, i32 4278190080>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i64_a:
|
||||
; CHECK: vmov.i8 d0, #0xff{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() {
|
||||
ret <1 x i64> <i64 -1>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vmov_i64_b:
|
||||
; CHECK: vmov.i64 d0, #0xffff00ff0000ff{{$}}
|
||||
; CHECK-NEXT: bx lr
|
||||
define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() {
|
||||
ret <1 x i64> <i64 72056498804490495>
|
||||
}
|
@ -219,15 +219,10 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32f() nounwind {
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @v_movQi64() nounwind {
|
||||
; CHECK-LE-LABEL: v_movQi64:
|
||||
; CHECK-LE: @ %bb.0:
|
||||
; CHECK-LE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
|
||||
; CHECK-LE-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-BE-LABEL: v_movQi64:
|
||||
; CHECK-BE: @ %bb.0:
|
||||
; CHECK-BE-NEXT: vmov.i64 q0, #0xffffff0000ff
|
||||
; CHECK-BE-NEXT: mov pc, lr
|
||||
; CHECK-LABEL: v_movQi64:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
|
||||
}
|
||||
|
||||
|
@ -263,15 +263,10 @@ entry:
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff() {
|
||||
; CHECKLE-LABEL: mov_int64_ff:
|
||||
; CHECKLE: @ %bb.0: @ %entry
|
||||
; CHECKLE-NEXT: vmov.i64 q0, #0xff
|
||||
; CHECKLE-NEXT: bx lr
|
||||
;
|
||||
; CHECKBE-LABEL: mov_int64_ff:
|
||||
; CHECKBE: @ %bb.0: @ %entry
|
||||
; CHECKBE-NEXT: vmov.i64 q0, #0xff00000000
|
||||
; CHECKBE-NEXT: bx lr
|
||||
; CHECK-LABEL: mov_int64_ff:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i64 q0, #0xff
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
ret <2 x i64> < i64 255, i64 255 >
|
||||
}
|
||||
@ -286,15 +281,10 @@ entry:
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() {
|
||||
; CHECKLE-LABEL: mov_int64_ff0000ff0000ffff:
|
||||
; CHECKLE: @ %bb.0: @ %entry
|
||||
; CHECKLE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
|
||||
; CHECKLE-NEXT: bx lr
|
||||
;
|
||||
; CHECKBE-LABEL: mov_int64_ff0000ff0000ffff:
|
||||
; CHECKBE: @ %bb.0: @ %entry
|
||||
; CHECKBE-NEXT: vmov.i64 q0, #0xffffff0000ff
|
||||
; CHECKBE-NEXT: bx lr
|
||||
; CHECK-LABEL: mov_int64_ff0000ff0000ffff:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
|
||||
}
|
||||
@ -338,7 +328,7 @@ define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f000f0f() {
|
||||
;
|
||||
; CHECKBE-LABEL: mov_int64_0f000f0f:
|
||||
; CHECKBE: @ %bb.0: @ %entry
|
||||
; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff00ff00
|
||||
; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff000000ff00
|
||||
; CHECKBE-NEXT: bx lr
|
||||
entry:
|
||||
ret <16 x i8> <i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0>
|
||||
@ -352,7 +342,7 @@ define arm_aapcs_vfpcc <8 x i16> @mov_int64_ff00ffff() {
|
||||
;
|
||||
; CHECKBE-LABEL: mov_int64_ff00ffff:
|
||||
; CHECKBE: @ %bb.0: @ %entry
|
||||
; CHECKBE-NEXT: vmov.i64 q0, #0xffffffffffff0000
|
||||
; CHECKBE-NEXT: vmov.i64 q0, #0xffff0000ffffffff
|
||||
; CHECKBE-NEXT: bx lr
|
||||
entry:
|
||||
ret <8 x i16> <i16 -1, i16 0, i16 -1, i16 -1, i16 -1, i16 0, i16 -1, i16 -1>
|
||||
@ -494,7 +484,7 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) {
|
||||
;
|
||||
; CHECKBE-LABEL: test:
|
||||
; CHECKBE: @ %bb.0: @ %entry
|
||||
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff00ff0000
|
||||
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
|
||||
; CHECKBE-NEXT: vrev64.8 q2, q1
|
||||
; CHECKBE-NEXT: vrev64.8 q1, q0
|
||||
; CHECKBE-NEXT: vorr q1, q1, q2
|
||||
@ -514,7 +504,7 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) {
|
||||
;
|
||||
; CHECKBE-LABEL: test2:
|
||||
; CHECKBE: @ %bb.0: @ %entry
|
||||
; CHECKBE-NEXT: vmov.i64 q1, #0xffffffffffff
|
||||
; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
|
||||
; CHECKBE-NEXT: vrev64.16 q2, q1
|
||||
; CHECKBE-NEXT: vrev64.16 q1, q0
|
||||
; CHECKBE-NEXT: vorr q1, q1, q2
|
||||
|
Loading…
Reference in New Issue
Block a user