From 5fca27122dbb0494261a8d9f51642e77e1927d75 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 9 Jun 2020 15:43:34 -0400 Subject: [PATCH] [x86] refine conditions for immediate hoisting to save code-size As shown in PR46237: https://bugs.llvm.org/show_bug.cgi?id=46237 The size-savings win for hoisting an 8-bit ALU immediate (intentionally excluding store constants) requires extreme conditions; it may not even be possible when including REX prefix bytes on x86-64. I did draft a version of this patch that included use counts after the loop, but I suspect that accounting is not working as expected. I think that is because the number of constant uses are changing as we select instructions (for example as we transform shl/add into LEA). Differential Revision: https://reviews.llvm.org/D81468 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 7 ++++--- test/CodeGen/X86/immediate_merging.ll | 26 ++++++++++++------------- test/CodeGen/X86/immediate_merging64.ll | 13 ++++++------- test/CodeGen/X86/pr27202.ll | 15 +++++++------- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 2171be29391..fadcb173cd4 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -364,9 +364,10 @@ namespace { if (User->getNumOperands() != 2) continue; - // If this can match to INC/DEC, don't count it as a use. - if (User->getOpcode() == ISD::ADD && - (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0)))) + // If this is a sign-extended 8-bit integer immediate used in an ALU + // instruction, there is probably an opcode encoding to save space. + auto *C = dyn_cast(N); + if (C && isInt<8>(C->getSExtValue())) continue; // Immediates that are used for offsets as part of stack diff --git a/test/CodeGen/X86/immediate_merging.ll b/test/CodeGen/X86/immediate_merging.ll index 1bed1014f94..038c56f6dd5 100644 --- a/test/CodeGen/X86/immediate_merging.ll +++ b/test/CodeGen/X86/immediate_merging.ll @@ -12,16 +12,16 @@ @i = common global i32 0, align 4 ; Test -Os to make sure immediates with multiple users don't get pulled in to -; instructions. +; instructions (8-bit immediates are exceptions). + define i32 @foo() optsize { ; X86-LABEL: foo: ; X86: # %bb.0: # %entry ; X86-NEXT: movl $1234, %eax # imm = 0x4D2 ; X86-NEXT: movl %eax, a ; X86-NEXT: movl %eax, b -; X86-NEXT: movl $12, %eax -; X86-NEXT: movl %eax, c -; X86-NEXT: cmpl %eax, e +; X86-NEXT: movl $12, c +; X86-NEXT: cmpl $12, e ; X86-NEXT: jne .LBB0_2 ; X86-NEXT: # %bb.1: # %if.then ; X86-NEXT: movl $1, x @@ -38,9 +38,8 @@ define i32 @foo() optsize { ; X64-NEXT: movl $1234, %eax # imm = 0x4D2 ; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: movl $12, %eax -; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, {{.*}}(%rip) +; X64-NEXT: movl $12, {{.*}}(%rip) +; X64-NEXT: cmpl $12, {{.*}}(%rip) ; X64-NEXT: jne .LBB0_2 ; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl $1, {{.*}}(%rip) @@ -74,16 +73,16 @@ if.end: ; preds = %if.then, %entry } ; Test PGSO to make sure immediates with multiple users don't get pulled in to -; instructions. +; instructions (8-bit immediates are exceptions). + define i32 @foo_pgso() !prof !14 { ; X86-LABEL: foo_pgso: ; X86: # %bb.0: # %entry ; X86-NEXT: movl $1234, %eax # imm = 0x4D2 ; X86-NEXT: movl %eax, a ; X86-NEXT: movl %eax, b -; X86-NEXT: movl $12, %eax -; X86-NEXT: movl %eax, c -; X86-NEXT: cmpl %eax, e +; X86-NEXT: movl $12, c +; X86-NEXT: cmpl $12, e ; X86-NEXT: jne .LBB1_2 ; X86-NEXT: # %bb.1: # %if.then ; X86-NEXT: movl $1, x @@ -100,9 +99,8 @@ define i32 @foo_pgso() !prof !14 { ; X64-NEXT: movl $1234, %eax # imm = 0x4D2 ; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: movl $12, %eax -; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, {{.*}}(%rip) +; X64-NEXT: movl $12, {{.*}}(%rip) +; X64-NEXT: cmpl $12, {{.*}}(%rip) ; X64-NEXT: jne .LBB1_2 ; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl $1, {{.*}}(%rip) diff --git a/test/CodeGen/X86/immediate_merging64.ll b/test/CodeGen/X86/immediate_merging64.ll index a807a119e89..d355bea1603 100644 --- a/test/CodeGen/X86/immediate_merging64.ll +++ b/test/CodeGen/X86/immediate_merging64.ll @@ -5,13 +5,13 @@ ; 32-bit immediates are merged for code size savings. ; Immediates with multiple users should not be pulled into instructions when -; optimizing for code size. +; optimizing for code size (but 8-bit immediates are exceptions). + define i1 @imm_multiple_users(i64 %a, i64* %b) optsize { ; CHECK-LABEL: imm_multiple_users: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: movq $-1, (%rsi) +; CHECK-NEXT: cmpq $-1, %rdi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq store i64 -1, i64* %b, align 8 @@ -22,9 +22,8 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize { define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 { ; CHECK-LABEL: imm_multiple_users_pgso: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: movq $-1, (%rsi) +; CHECK-NEXT: cmpq $-1, %rdi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq store i64 -1, i64* %b, align 8 diff --git a/test/CodeGen/X86/pr27202.ll b/test/CodeGen/X86/pr27202.ll index ea5781ed8c5..bb6be1d1685 100644 --- a/test/CodeGen/X86/pr27202.ll +++ b/test/CodeGen/X86/pr27202.ll @@ -14,12 +14,14 @@ define i1 @foo(i32 %i) optsize { ret i1 %cmp } +; 8-bit ALU immediates probably have small encodings. +; We do not want to hoist the constant into a register here. + define zeroext i1 @g(i32 %x) optsize { ; CHECK-LABEL: g: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: orl $1, %edi +; CHECK-NEXT: cmpl $1, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %t0 = or i32 %x, 1 @@ -27,7 +29,7 @@ define zeroext i1 @g(i32 %x) optsize { ret i1 %t1 } -; 8-bit immediates probably have small encodings. +; 8-bit ALU immediates probably have small encodings. ; We do not want to hoist the constant into a register here. define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { @@ -36,9 +38,8 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll $6, %eax ; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: movl $7, %eax -; CHECK-NEXT: andq %rax, %rsi -; CHECK-NEXT: andq %rax, %rdx +; CHECK-NEXT: andl $7, %esi +; CHECK-NEXT: andl $7, %edx ; CHECK-NEXT: leaq (%rdx,%rsi,8), %rax ; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: retq