1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[X86] Don't use GR64 register 'and with immediate' instructions if the immediate is zero in the upper 33-bits or upper 57-bits. Use GR32 instructions instead.

Previously the patterns didn't have high enough priority and we would only use the GR32 form if the only the upper 32 or 56 bits were zero.

Fixes PR23100.

llvm-svn: 234075
This commit is contained in:
Craig Topper 2015-04-04 02:08:20 +00:00
parent f0e072b4f9
commit 54ac1d95cd
10 changed files with 26 additions and 9 deletions

View File

@ -1233,6 +1233,7 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
// least 32 bits of leading zeros. If in addition the last 32 bits can be
// represented with a sign extension of a 8 bit constant, use that.
let AddedComplexity = 1 in // Give priority over i64immSExt8.
def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
(SUBREG_TO_REG
(i64 0),
@ -1241,6 +1242,7 @@ def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
(i32 (GetLo8XForm imm:$imm))),
sub_32bit)>;
let AddedComplexity = 1 in // Give priority over i64immSExt32.
def : Pat<(and GR64:$src, i64immZExt32:$imm),
(SUBREG_TO_REG
(i64 0),
@ -1267,16 +1269,19 @@ def : Pat<(and GR16:$src1, 0xff),
Requires<[Not64BitMode]>;
// r & (2^32-1) ==> movz
let AddedComplexity = 1 in // Give priority over i64immZExt32.
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
(SUBREG_TO_REG (i64 0),
(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
sub_32bit)>;
// r & (2^16-1) ==> movz
let AddedComplexity = 1 in // Give priority over i64immZExt32.
def : Pat<(and GR64:$src, 0xffff),
(SUBREG_TO_REG (i64 0),
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
sub_32bit)>;
// r & (2^8-1) ==> movz
let AddedComplexity = 1 in // Give priority over i64immSExt32.
def : Pat<(and GR64:$src, 0xff),
(SUBREG_TO_REG (i64 0),
(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "7 machine-licm"
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "9 machine-licm"
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
; rdar://6627786
; rdar://7792037

View File

@ -21,6 +21,6 @@ entry:
%tmp1 = and i64 %x, 123127
%tmp2 = or i64 %tmp1, 3
ret i64 %tmp2
; DARWIN-OPT: andq $123124
; DARWIN-OPT: andl $123124
; DARWIN-OPT-NEXT: leaq 3
}

View File

@ -17,3 +17,15 @@ define void @foo(i64 %zed, i64* %x) nounwind {
store i64 %t2, i64* %x, align 8
ret void
}
define i64 @bar(i64 %zed) nounwind {
; CHECK: andl $42, %edi # encoding: [0x83,0xe7,0x2a]
%t1 = and i64 %zed, 42
ret i64 %t1
}
define i64 @baz(i64 %zed) nounwind {
; CHECK: andl $2147483647, %edi # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f]
%t1 = and i64 %zed, 2147483647
ret i64 %t1
}

View File

@ -48,7 +48,7 @@ define void @atomic_fetch_and64() nounwind {
; X64: lock
; X64: andq $3
%t2 = atomicrmw and i64* @sc64, i64 5 acquire
; X64: andq
; X64: andl
; X64: lock
; X64: cmpxchgq
%t3 = atomicrmw and i64* @sc64, i64 %t2 acquire

View File

@ -260,7 +260,7 @@ entry:
%and = and i64 %x, 2147483647
ret i64 %and
; CHECK-LABEL: bzhi64_small_constant_mask:
; CHECK: andq $2147483647, %r[[ARG1]]
; CHECK: andl $2147483647, %e[[ARG1]]
}
define i32 @blsi32(i32 %x) nounwind readnone {

View File

@ -29,7 +29,7 @@ define i64 @t3(i64 %x) nounwind readnone ssp {
entry:
; CHECK-LABEL: t3:
; CHECK: sbbq %rax, %rax
; CHECK: andq $64, %rax
; CHECK: andl $64, %eax
%0 = icmp ult i64 %x, 18 ; <i1> [#uses=1]
%iftmp.2.0 = select i1 %0, i64 64, i64 0 ; <i64> [#uses=1]
ret i64 %iftmp.2.0

View File

@ -3,7 +3,7 @@
define i64 @test(i64 %A) {
; CHECK: @test
; CHECK: shrq $54
; CHECK: andq $1020
; CHECK: andl $1020
; CHECK: ret
%B = lshr i64 %A, 56
%C = shl i64 %B, 2

View File

@ -6,13 +6,13 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
; CHECK: testq %rdi, %rdi
; CHECK: andl
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: js LBB0_1
; CHECK: cvtsi2ss
; CHECK-NEXT: ret
; CHECK: LBB0_1
; CHECK: shrq
; CHECK-NEXT: andq
; CHECK-NEXT: orq
; CHECK-NEXT: cvtsi2ss
define float @test(i64 %a) {

View File

@ -3,7 +3,7 @@
define i64 @z() nounwind {
; CHECK: movq $tm_nest_level@TPOFF, %r[[R0:[abcd]]]x
; CHECK-NEXT: addl %fs:0, %e[[R0]]x
; CHECK-NEXT: andq $100, %r[[R0]]x
; CHECK-NEXT: andl $100, %e[[R0]]x
ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
}