1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[x86] allow 8-bit adds to be promoted by convertToThreeAddress() to form LEA

This extends the code that handles 16-bit add promotion to form LEA to also allow 8-bit adds. 
That allows us to combine add ops with register moves and save some instructions. This is 
another step towards allowing add truncation in generic DAGCombiner (see D54640).

Differential Revision: https://reviews.llvm.org/D55494

llvm-svn: 348946
This commit is contained in:
Sanjay Patel 2018-12-12 17:58:27 +00:00
parent 6d994471cc
commit cfc854315e
14 changed files with 70 additions and 57 deletions

View File

@ -913,8 +913,8 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR in {
def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
@ -931,9 +931,9 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;

View File

@ -797,6 +797,13 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
LiveVariables *LV) const {
// We handle 8-bit adds and various 16-bit opcodes in the switch below.
bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
*RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
"Unexpected type for LEA transform");
// TODO: For a 32-bit target, we need to adjust the LEA variables with
// something like this:
// Opcode = X86::LEA32r;
@ -807,13 +814,12 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
if (!Subtarget.is64Bit())
return nullptr;
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
unsigned Opcode = X86::LEA64_32r;
unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 16-bits.
// we will be shifting and then extracting the lower 8/16-bits.
// This has the potential to cause partial register stall. e.g.
// movw (%rbp,%rcx,2), %dx
// leal -65(%rdx), %esi
@ -824,11 +830,12 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned Src = MI.getOperand(1).getReg();
bool IsDead = MI.getOperand(0).isDead();
bool IsKill = MI.getOperand(1).isKill();
unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
MachineInstr *InsMI =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA, RegState::Define, X86::sub_16bit)
.addReg(InRegLEA, RegState::Define, SubReg)
.addReg(Src, getKillRegState(IsKill));
MachineInstrBuilder MIB =
@ -847,12 +854,14 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
case X86::DEC16r:
addRegOffset(MIB, InRegLEA, true, -1);
break;
case X86::ADD8ri:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
case X86::ADD8rr:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
unsigned Src2 = MI.getOperand(2).getReg();
@ -861,7 +870,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned InRegLEA2 = 0;
MachineInstr *InsMI2 = nullptr;
if (Src == Src2) {
// ADD16rr killed %reg1028, %reg1028
// ADD8rr/ADD16rr killed %reg1028, %reg1028
// just a single insert_subreg.
addRegReg(MIB, InRegLEA, true, InRegLEA, false);
} else {
@ -870,10 +879,10 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
else
InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 16-bits.
// we will be shifting and then extracting the lower 8/16-bits.
BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA2, RegState::Define, X86::sub_16bit)
.addReg(InRegLEA2, RegState::Define, SubReg)
.addReg(Src2, getKillRegState(IsKill2));
addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
}
@ -887,7 +896,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
MachineInstr *ExtMI =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(Dest, RegState::Define | getDeadRegState(IsDead))
.addReg(OutRegLEA, RegState::Kill, X86::sub_16bit);
.addReg(OutRegLEA, RegState::Kill, SubReg);
if (LV) {
// Update live variables.
@ -1084,6 +1093,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
break;
}
case X86::ADD8rr:
case X86::ADD16rr:
case X86::ADD16rr_DB:
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
@ -1119,6 +1129,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
NewMI = addOffset(MIB, MI.getOperand(2));
break;
}
case X86::ADD8ri:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:

View File

@ -584,9 +584,9 @@ protected:
const MachineOperand *&Destination) const override;
private:
/// This is a helper for convertToThreeAddress for 16-bit instructions.
/// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
/// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
/// super-register and then truncating back down to a 16-bit sub-register.
/// super-register and then truncating back down to a 8/16-bit sub-register.
MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
MachineInstr &MI,

View File

@ -57,8 +57,9 @@ define i16 @test_add_i16(i16 %arg1, i16 %arg2) {
define i8 @test_add_i8(i8 %arg1, i8 %arg2) {
; X64-LABEL: test_add_i8:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: addb %dil, %al
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: leal (%rsi,%rdi), %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;

View File

@ -11,8 +11,9 @@ define i16 @test_shl_i4(i16 %v, i16 %a, i16 %b) {
; X64-LABEL: test_shl_i4:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: addb %sil, %cl
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: # kill: def $edx killed $edx def $rdx
; X64-NEXT: leal (%rdx,%rsi), %ecx
; X64-NEXT: andb $15, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shlb %cl, %al

View File

@ -147,8 +147,8 @@ define i8 @test_shl_i8_imm(i32 %arg1) {
define i8 @test_shl_i8_imm1(i32 %arg1) {
; X64-LABEL: test_shl_i8_imm1:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: addb %al, %al
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (%rdi,%rdi), %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%a = trunc i32 %arg1 to i8

View File

@ -43,9 +43,10 @@ define i16 @test_movw(i16 %a0) {
define i8 @test_movb_hreg(i16 %a0) {
; X64-LABEL: test_movb_hreg:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl $8, %eax
; X64-NEXT: addb %dil, %al
; X64-NEXT: leal (%rax,%rdi), %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;

View File

@ -358,9 +358,9 @@ define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
;
; X64-LABEL: const_shift_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: shrb $7, %sil
; X64-NEXT: addb %al, %al
; X64-NEXT: leal (%rdi,%rdi), %eax
; X64-NEXT: orb %sil, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq

View File

@ -21,10 +21,10 @@ define i8 @test_i8(i8 %a) nounwind {
;
; X64-LABEL: test_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: sarb $7, %cl
; X64-NEXT: addb %cl, %al
; X64-NEXT: leal (%rdi,%rcx), %eax
; X64-NEXT: xorb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq

View File

@ -14,8 +14,8 @@ define i8 @test_mul_by_1(i8 %x) {
define i8 @test_mul_by_2(i8 %x) {
; X64-LABEL: test_mul_by_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: addb %al, %al
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (%rdi,%rdi), %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%m = mul i8 %x, 2

View File

@ -25,6 +25,7 @@ define i8 @cnt8(i8 %x) nounwind readnone {
;
; X64-LABEL: cnt8:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrb %al
; X64-NEXT: andb $85, %al
@ -36,8 +37,9 @@ define i8 @cnt8(i8 %x) nounwind readnone {
; X64-NEXT: addb %al, %dil
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrb $4, %al
; X64-NEXT: addb %dil, %al
; X64-NEXT: leal (%rax,%rdi), %eax
; X64-NEXT: andb $15, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X32-POPCNT-LABEL: cnt8:

View File

@ -4,8 +4,8 @@
define i2 @f(i32 %arg) {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: addb %al, %al
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: leal (%rdi,%rdi), %eax
; CHECK-NEXT: orb $1, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq

View File

@ -642,9 +642,9 @@ define i32 @rotate_demanded_bits_3(i32, i32) {
;
; X64-LABEL: rotate_demanded_bits_3:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: addb %cl, %cl
; X64-NEXT: leal (%rsi,%rsi), %ecx
; X64-NEXT: andb $30, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: roll %cl, %eax

View File

@ -19,29 +19,28 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: incl %esi
; ILP-NEXT: addb %sil, %sil
; ILP-NEXT: orb $1, %sil
; ILP-NEXT: movl $1, %r9d
; ILP-NEXT: movl $1, %r10d
; ILP-NEXT: xorl %r14d, %r14d
; ILP-NEXT: movl %esi, %ecx
; ILP-NEXT: shldq %cl, %r9, %r14
; ILP-NEXT: shldq %cl, %r10, %r14
; ILP-NEXT: movl $1, %edx
; ILP-NEXT: shlq %cl, %rdx
; ILP-NEXT: movl %esi, %r11d
; ILP-NEXT: addb $-128, %r11b
; ILP-NEXT: movb $-128, %r10b
; ILP-NEXT: leal -128(%rsi), %r9d
; ILP-NEXT: movb $-128, %r11b
; ILP-NEXT: xorl %ebx, %ebx
; ILP-NEXT: movl %r11d, %ecx
; ILP-NEXT: shldq %cl, %r9, %rbx
; ILP-NEXT: movl %r9d, %ecx
; ILP-NEXT: shldq %cl, %r10, %rbx
; ILP-NEXT: testb $64, %sil
; ILP-NEXT: cmovneq %rdx, %r14
; ILP-NEXT: cmovneq %r8, %rdx
; ILP-NEXT: movl $1, %edi
; ILP-NEXT: shlq %cl, %rdi
; ILP-NEXT: subb %sil, %r10b
; ILP-NEXT: movl %r10d, %ecx
; ILP-NEXT: shrdq %cl, %r8, %r9
; ILP-NEXT: testb $64, %r10b
; ILP-NEXT: cmovneq %r8, %r9
; ILP-NEXT: subb %sil, %r11b
; ILP-NEXT: movl %r11d, %ecx
; ILP-NEXT: shrdq %cl, %r8, %r10
; ILP-NEXT: testb $64, %r11b
; ILP-NEXT: cmovneq %r8, %r10
; ILP-NEXT: testb $64, %r9b
; ILP-NEXT: cmovneq %rdi, %rbx
; ILP-NEXT: cmovneq %r8, %rdi
; ILP-NEXT: testb %sil, %sil
@ -52,7 +51,7 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: cmovnsq %r8, %rbx
; ILP-NEXT: cmoveq %r8, %rbx
; ILP-NEXT: movq %rbx, 24(%rax)
; ILP-NEXT: cmovnsq %r9, %rdi
; ILP-NEXT: cmovnsq %r10, %rdi
; ILP-NEXT: cmoveq %r8, %rdi
; ILP-NEXT: movq %rdi, 16(%rax)
; ILP-NEXT: popq %rbx
@ -76,7 +75,7 @@ define i256 @test1(i256 %a) nounwind {
; HYBRID-NEXT: xorl %r10d, %r10d
; HYBRID-NEXT: movl %esi, %ecx
; HYBRID-NEXT: shldq %cl, %r11, %r10
; HYBRID-NEXT: addb $-128, %cl
; HYBRID-NEXT: leal -128(%rsi), %ecx
; HYBRID-NEXT: xorl %edi, %edi
; HYBRID-NEXT: shldq %cl, %r11, %rdi
; HYBRID-NEXT: movl $1, %edx
@ -119,7 +118,7 @@ define i256 @test1(i256 %a) nounwind {
; BURR-NEXT: xorl %r10d, %r10d
; BURR-NEXT: movl %esi, %ecx
; BURR-NEXT: shldq %cl, %r11, %r10
; BURR-NEXT: addb $-128, %cl
; BURR-NEXT: leal -128(%rsi), %ecx
; BURR-NEXT: xorl %edi, %edi
; BURR-NEXT: shldq %cl, %r11, %rdi
; BURR-NEXT: movl $1, %edx
@ -160,8 +159,7 @@ define i256 @test1(i256 %a) nounwind {
; SRC-NEXT: shrdq %cl, %r8, %r10
; SRC-NEXT: testb $64, %cl
; SRC-NEXT: cmovneq %r8, %r10
; SRC-NEXT: movl %esi, %r9d
; SRC-NEXT: addb $-128, %r9b
; SRC-NEXT: leal -128(%rsi), %r9d
; SRC-NEXT: xorl %edx, %edx
; SRC-NEXT: movl %r9d, %ecx
; SRC-NEXT: shldq %cl, %rdi, %rdx
@ -215,13 +213,12 @@ define i256 @test1(i256 %a) nounwind {
; LIN-NEXT: cmovneq %rdx, %rdi
; LIN-NEXT: cmovsq %r9, %rdi
; LIN-NEXT: movq %rdi, 8(%rax)
; LIN-NEXT: movl %esi, %edx
; LIN-NEXT: addb $-128, %dl
; LIN-NEXT: movl $1, %r10d
; LIN-NEXT: movl %edx, %ecx
; LIN-NEXT: shlq %cl, %r10
; LIN-NEXT: testb $64, %dl
; LIN-NEXT: movq %r10, %rdi
; LIN-NEXT: leal -128(%rsi), %r10d
; LIN-NEXT: movl $1, %edx
; LIN-NEXT: movl %r10d, %ecx
; LIN-NEXT: shlq %cl, %rdx
; LIN-NEXT: testb $64, %r10b
; LIN-NEXT: movq %rdx, %rdi
; LIN-NEXT: cmovneq %r9, %rdi
; LIN-NEXT: movb $-128, %cl
; LIN-NEXT: subb %sil, %cl
@ -233,9 +230,9 @@ define i256 @test1(i256 %a) nounwind {
; LIN-NEXT: cmoveq %r9, %rsi
; LIN-NEXT: movq %rsi, 16(%rax)
; LIN-NEXT: xorl %esi, %esi
; LIN-NEXT: movl %edx, %ecx
; LIN-NEXT: movl %r10d, %ecx
; LIN-NEXT: shldq %cl, %r8, %rsi
; LIN-NEXT: cmovneq %r10, %rsi
; LIN-NEXT: cmovneq %rdx, %rsi
; LIN-NEXT: cmovnsq %r9, %rsi
; LIN-NEXT: cmoveq %r9, %rsi
; LIN-NEXT: movq %rsi, 24(%rax)