1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Avoid using high register trick for test instruction

Summary:
It seems it's main effect is to create addition copies when values are inr register that do not support this trick, which increase register pressure and makes the code bigger.

The main noteworthy regression I was able to observe was pattern of the type (setcc (trunc (and X, C)), 0) where C is such as it would benefit from the hi register trick. To prevent this, a new pattern is added to materialize such pattern using a 32 bits test. This has the added benefit of working with any constant that is materializable as a 32bits immediate, not just the ones that can leverage the high register trick, as demonstrated by the test case in test-shrink.ll using the constant 2049 .

Reviewers: craig.topper, niravd, spatel, hfinkel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D42646

llvm-svn: 323690
This commit is contained in:
Amaury Sechet 2018-01-29 20:54:33 +00:00
parent f20f873912
commit 90c261493f
7 changed files with 29 additions and 82 deletions

View File

@ -3073,67 +3073,33 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
}
// For example, "testl %eax, $2048" to "testb %ah, $8".
if (isShiftedUInt<8, 8>(Mask) &&
(!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
// Shift the immediate right by 8 bits.
SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8);
SDValue Reg = N0.getOperand(0);
// Extract the h-register.
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);
// Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
// target GR8_NOREX registers, so make sure the register class is
// forced.
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
MVT::i32, Subreg, ShiftedImm);
// Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);
return;
}
// For example, "testl %eax, $32776" to "testw %ax, $32776".
// NOTE: We only want to form TESTW instructions if optimizing for
// min size. Otherwise we only save one byte and possibly get a length
// changing prefix penalty in the decoders.
if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 &&
(!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16);
SDValue Reg = N0.getOperand(0);
// Extract the 16-bit subregister.
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
MVT::i16, Reg);
// Emit a testw.
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
Subreg, Imm);
// Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);
return;
}
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 &&
if (isUInt<32>(Mask) &&
(!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
MVT VT = MVT::i32;
int SubRegOp = X86::sub_32bit;
unsigned Op = X86::TEST32ri;
// For example, "testl %eax, $32776" to "testw %ax, $32776".
// NOTE: We only want to form TESTW instructions if optimizing for
// min size. Otherwise we only save one byte and possibly get a length
// changing prefix penalty in the decoders.
if (OptForMinSize && isUInt<16>(Mask) &&
(!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
VT = MVT::i16;
SubRegOp = X86::sub_16bit;
Op = X86::TEST16ri;
}
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
SDValue Reg = N0.getOperand(0);
// Extract the 32-bit subregister.
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
MVT::i32, Reg);
// Extract the subregister if necessary.
if (N0.getValueType() != VT)
Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
// Emit a testl.
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
Subreg, Imm);
// Emit a testl or testw.
SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
// Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),

View File

@ -1257,14 +1257,6 @@ let isCompare = 1 in {
def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>;
let Predicates = [In64BitMode] in
def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>;
// When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
// register class is constrained to GR8_NOREX. This pseudo is explicitly
// marked side-effect free, since it doesn't have an isel pattern like
// other test instructions.
let isPseudo = 1, hasSideEffects = 0 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
"", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
} // Defs = [EFLAGS]
def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL,

View File

@ -8018,9 +8018,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::VMOVUPSZ256mr_NOVLX:
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
case X86::TEST8ri_NOREX:
MI.setDesc(get(X86::TEST8ri));
return true;
case X86::MOV32ri64:
MI.setDesc(get(X86::MOV32ri));
return true;

View File

@ -86,7 +86,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::TEST16mr:
case X86::TEST32mr:
case X86::TEST64mr:
case X86::TEST8ri_NOREX:
case X86::AND16i16:
case X86::AND16ri:
case X86::AND16ri8:

View File

@ -484,8 +484,7 @@ no:
define void @truncand32(i16 inreg %x) nounwind {
; CHECK-LINUX64-LABEL: truncand32:
; CHECK-LINUX64: # %bb.0:
; CHECK-LINUX64-NEXT: andl $2049, %edi # imm = 0x801
; CHECK-LINUX64-NEXT: testw %di, %di
; CHECK-LINUX64-NEXT: testl $2049, %edi # imm = 0x801
; CHECK-LINUX64-NEXT: je .LBB11_1
; CHECK-LINUX64-NEXT: # %bb.2: # %no
; CHECK-LINUX64-NEXT: retq
@ -498,8 +497,7 @@ define void @truncand32(i16 inreg %x) nounwind {
; CHECK-WIN32-64-LABEL: truncand32:
; CHECK-WIN32-64: # %bb.0:
; CHECK-WIN32-64-NEXT: subq $40, %rsp
; CHECK-WIN32-64-NEXT: andl $2049, %ecx # imm = 0x801
; CHECK-WIN32-64-NEXT: testw %cx, %cx
; CHECK-WIN32-64-NEXT: testl $2049, %ecx # imm = 0x801
; CHECK-WIN32-64-NEXT: je .LBB11_1
; CHECK-WIN32-64-NEXT: # %bb.2: # %no
; CHECK-WIN32-64-NEXT: addq $40, %rsp
@ -511,8 +509,7 @@ define void @truncand32(i16 inreg %x) nounwind {
;
; CHECK-X86-LABEL: truncand32:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: andl $2049, %eax # imm = 0x801
; CHECK-X86-NEXT: testw %ax, %ax
; CHECK-X86-NEXT: testl $2049, %eax # imm = 0x801
; CHECK-X86-NEXT: je .LBB11_1
; CHECK-X86-NEXT: # %bb.2: # %no
; CHECK-X86-NEXT: retl

View File

@ -6,9 +6,8 @@
define i32 @check_flag(i32 %flags, ...) nounwind {
; CHECK-LABEL: check_flag:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %edi, %ecx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb $2, %ch
; CHECK-NEXT: testl $512, %edi # imm = 0x200
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movl $1, %eax

View File

@ -8,9 +8,7 @@ target triple = "x86_64-apple-macosx10.10.0"
define i32 @check_flag(i32 %flags, ...) nounwind {
; CHECK-LABEL: check_flag:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $48, %rsp
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: subq $56, %rsp
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je LBB0_2
; CHECK-NEXT: ## %bb.1: ## %entry
@ -29,7 +27,7 @@ define i32 @check_flag(i32 %flags, ...) nounwind {
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb $2, %bh
; CHECK-NEXT: testl $512, %edi ## imm = 0x200
; CHECK-NEXT: je LBB0_4
; CHECK-NEXT: ## %bb.3: ## %if.then
; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
@ -40,8 +38,7 @@ define i32 @check_flag(i32 %flags, ...) nounwind {
; CHECK-NEXT: movl $8, 0
; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: LBB0_4: ## %if.end
; CHECK-NEXT: addq $48, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: retq
entry:
%and = and i32 %flags, 512