1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[x86] refine conditions for immediate hoisting to save code-size

As shown in PR46237:
https://bugs.llvm.org/show_bug.cgi?id=46237

The size-savings win for hoisting an 8-bit ALU immediate (intentionally
excluding store constants) requires extreme conditions; it may not even
be possible when including REX prefix bytes on x86-64.

I did draft a version of this patch that included use counts after the
loop, but I suspect that accounting is not working as expected. I think
that is because the number of constant uses are changing as we select
instructions (for example as we transform shl/add into LEA).

Differential Revision: https://reviews.llvm.org/D81468
This commit is contained in:
Sanjay Patel 2020-06-09 15:43:34 -04:00
parent 0f6cb1fbd8
commit 5fca27122d
4 changed files with 30 additions and 31 deletions

View File

@ -364,9 +364,10 @@ namespace {
if (User->getNumOperands() != 2) if (User->getNumOperands() != 2)
continue; continue;
// If this can match to INC/DEC, don't count it as a use. // If this is a sign-extended 8-bit integer immediate used in an ALU
if (User->getOpcode() == ISD::ADD && // instruction, there is probably an opcode encoding to save space.
(isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0)))) auto *C = dyn_cast<ConstantSDNode>(N);
if (C && isInt<8>(C->getSExtValue()))
continue; continue;
// Immediates that are used for offsets as part of stack // Immediates that are used for offsets as part of stack

View File

@ -12,16 +12,16 @@
@i = common global i32 0, align 4 @i = common global i32 0, align 4
; Test -Os to make sure immediates with multiple users don't get pulled in to ; Test -Os to make sure immediates with multiple users don't get pulled in to
; instructions. ; instructions (8-bit immediates are exceptions).
define i32 @foo() optsize { define i32 @foo() optsize {
; X86-LABEL: foo: ; X86-LABEL: foo:
; X86: # %bb.0: # %entry ; X86: # %bb.0: # %entry
; X86-NEXT: movl $1234, %eax # imm = 0x4D2 ; X86-NEXT: movl $1234, %eax # imm = 0x4D2
; X86-NEXT: movl %eax, a ; X86-NEXT: movl %eax, a
; X86-NEXT: movl %eax, b ; X86-NEXT: movl %eax, b
; X86-NEXT: movl $12, %eax ; X86-NEXT: movl $12, c
; X86-NEXT: movl %eax, c ; X86-NEXT: cmpl $12, e
; X86-NEXT: cmpl %eax, e
; X86-NEXT: jne .LBB0_2 ; X86-NEXT: jne .LBB0_2
; X86-NEXT: # %bb.1: # %if.then ; X86-NEXT: # %bb.1: # %if.then
; X86-NEXT: movl $1, x ; X86-NEXT: movl $1, x
@ -38,9 +38,8 @@ define i32 @foo() optsize {
; X64-NEXT: movl $1234, %eax # imm = 0x4D2 ; X64-NEXT: movl $1234, %eax # imm = 0x4D2
; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip)
; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip)
; X64-NEXT: movl $12, %eax ; X64-NEXT: movl $12, {{.*}}(%rip)
; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: cmpl $12, {{.*}}(%rip)
; X64-NEXT: cmpl %eax, {{.*}}(%rip)
; X64-NEXT: jne .LBB0_2 ; X64-NEXT: jne .LBB0_2
; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: # %bb.1: # %if.then
; X64-NEXT: movl $1, {{.*}}(%rip) ; X64-NEXT: movl $1, {{.*}}(%rip)
@ -74,16 +73,16 @@ if.end: ; preds = %if.then, %entry
} }
; Test PGSO to make sure immediates with multiple users don't get pulled in to ; Test PGSO to make sure immediates with multiple users don't get pulled in to
; instructions. ; instructions (8-bit immediates are exceptions).
define i32 @foo_pgso() !prof !14 { define i32 @foo_pgso() !prof !14 {
; X86-LABEL: foo_pgso: ; X86-LABEL: foo_pgso:
; X86: # %bb.0: # %entry ; X86: # %bb.0: # %entry
; X86-NEXT: movl $1234, %eax # imm = 0x4D2 ; X86-NEXT: movl $1234, %eax # imm = 0x4D2
; X86-NEXT: movl %eax, a ; X86-NEXT: movl %eax, a
; X86-NEXT: movl %eax, b ; X86-NEXT: movl %eax, b
; X86-NEXT: movl $12, %eax ; X86-NEXT: movl $12, c
; X86-NEXT: movl %eax, c ; X86-NEXT: cmpl $12, e
; X86-NEXT: cmpl %eax, e
; X86-NEXT: jne .LBB1_2 ; X86-NEXT: jne .LBB1_2
; X86-NEXT: # %bb.1: # %if.then ; X86-NEXT: # %bb.1: # %if.then
; X86-NEXT: movl $1, x ; X86-NEXT: movl $1, x
@ -100,9 +99,8 @@ define i32 @foo_pgso() !prof !14 {
; X64-NEXT: movl $1234, %eax # imm = 0x4D2 ; X64-NEXT: movl $1234, %eax # imm = 0x4D2
; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip)
; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip)
; X64-NEXT: movl $12, %eax ; X64-NEXT: movl $12, {{.*}}(%rip)
; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: cmpl $12, {{.*}}(%rip)
; X64-NEXT: cmpl %eax, {{.*}}(%rip)
; X64-NEXT: jne .LBB1_2 ; X64-NEXT: jne .LBB1_2
; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: # %bb.1: # %if.then
; X64-NEXT: movl $1, {{.*}}(%rip) ; X64-NEXT: movl $1, {{.*}}(%rip)

View File

@ -5,13 +5,13 @@
; 32-bit immediates are merged for code size savings. ; 32-bit immediates are merged for code size savings.
; Immediates with multiple users should not be pulled into instructions when ; Immediates with multiple users should not be pulled into instructions when
; optimizing for code size. ; optimizing for code size (but 8-bit immediates are exceptions).
define i1 @imm_multiple_users(i64 %a, i64* %b) optsize { define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
; CHECK-LABEL: imm_multiple_users: ; CHECK-LABEL: imm_multiple_users:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movq $-1, %rax ; CHECK-NEXT: movq $-1, (%rsi)
; CHECK-NEXT: movq %rax, (%rsi) ; CHECK-NEXT: cmpq $-1, %rdi
; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NEXT: sete %al ; CHECK-NEXT: sete %al
; CHECK-NEXT: retq ; CHECK-NEXT: retq
store i64 -1, i64* %b, align 8 store i64 -1, i64* %b, align 8
@ -22,9 +22,8 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 { define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 {
; CHECK-LABEL: imm_multiple_users_pgso: ; CHECK-LABEL: imm_multiple_users_pgso:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movq $-1, %rax ; CHECK-NEXT: movq $-1, (%rsi)
; CHECK-NEXT: movq %rax, (%rsi) ; CHECK-NEXT: cmpq $-1, %rdi
; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NEXT: sete %al ; CHECK-NEXT: sete %al
; CHECK-NEXT: retq ; CHECK-NEXT: retq
store i64 -1, i64* %b, align 8 store i64 -1, i64* %b, align 8

View File

@ -14,12 +14,14 @@ define i1 @foo(i32 %i) optsize {
ret i1 %cmp ret i1 %cmp
} }
; 8-bit ALU immediates probably have small encodings.
; We do not want to hoist the constant into a register here.
define zeroext i1 @g(i32 %x) optsize { define zeroext i1 @g(i32 %x) optsize {
; CHECK-LABEL: g: ; CHECK-LABEL: g:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: orl %eax, %edi ; CHECK-NEXT: cmpl $1, %edi
; CHECK-NEXT: cmpl %eax, %edi
; CHECK-NEXT: sete %al ; CHECK-NEXT: sete %al
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%t0 = or i32 %x, 1 %t0 = or i32 %x, 1
@ -27,7 +29,7 @@ define zeroext i1 @g(i32 %x) optsize {
ret i1 %t1 ret i1 %t1
} }
; 8-bit immediates probably have small encodings. ; 8-bit ALU immediates probably have small encodings.
; We do not want to hoist the constant into a register here. ; We do not want to hoist the constant into a register here.
define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
@ -36,9 +38,8 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: shll $6, %eax ; CHECK-NEXT: shll $6, %eax
; CHECK-NEXT: movzbl %al, %ecx ; CHECK-NEXT: movzbl %al, %ecx
; CHECK-NEXT: movl $7, %eax ; CHECK-NEXT: andl $7, %esi
; CHECK-NEXT: andq %rax, %rsi ; CHECK-NEXT: andl $7, %edx
; CHECK-NEXT: andq %rax, %rdx
; CHECK-NEXT: leaq (%rdx,%rsi,8), %rax ; CHECK-NEXT: leaq (%rdx,%rsi,8), %rax
; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: orq %rcx, %rax
; CHECK-NEXT: retq ; CHECK-NEXT: retq