From ee806a0db597df284e748a9a04df23cebd2a8c77 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 11 Jan 2010 17:03:47 +0000 Subject: [PATCH] Select an OR with immediate as an ADD if the input bits are known zero. This allow the instruction to be 3address-fied if needed. llvm-svn: 93152 --- lib/Target/X86/X86Instr64bit.td | 16 ++++++-- lib/Target/X86/X86InstrInfo.td | 39 ++++++++++++++++--- .../X86/2009-05-23-dagcombine-shifts.ll | 8 +++- test/CodeGen/X86/3addr-or.ll | 11 ++++++ 4 files changed, 64 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/X86/3addr-or.ll diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index b67cb630bbe..6bfa9a6949b 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1106,13 +1106,13 @@ def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, (or_not_add GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, (or_not_add GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; } // isTwoAddress def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -2114,6 +2114,14 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; +// (or x, c) -> (add x, c) if masked bits are known zero. +def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; + // X86 specific add which produces a flag. def : Pat<(addc GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 3323e23ef8c..6835176485a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -493,6 +493,18 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); }]>; +// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + return false; +}]>; +def or_not_add : PatFrag<(ops node:$lhs, node:$rhs),(or node:$lhs, node:$rhs),[{ + ConstantSDNode *CN = dyn_cast(N->getOperand(1)); + if (!CN) return true; + return !CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); +}]>; + // 'shld' and 'shrd' instruction patterns. Note that even though these have // the srl and shl in their patterns, the C++ code must still check for them, // because predicates are tested before children nodes are explored. @@ -1880,28 +1892,28 @@ def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst), def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (or GR8:$src1, imm:$src2)), + [(set GR8:$dst, (or_not_add GR8:$src1, imm:$src2)), (implicit EFLAGS)]>; def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (or GR16:$src1, imm:$src2)), + [(set GR16:$dst, (or_not_add GR16:$src1, imm:$src2)), (implicit EFLAGS)]>, OpSize; def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, imm:$src2)), + [(set GR32:$dst, (or_not_add GR32:$src1, imm:$src2)), (implicit EFLAGS)]>; def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)), + [(set GR16:$dst, (or_not_add GR16:$src1, i16immSExt8:$src2)), (implicit EFLAGS)]>, OpSize; def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)), + [(set GR32:$dst, (or_not_add GR32:$src1, i32immSExt8:$src2)), (implicit EFLAGS)]>; let isTwoAddress = 0 in { def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), @@ -4647,6 +4659,23 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; +// (or x, c) -> (add x, c) if masked bits are known zero. +def : Pat<(parallel (or_is_add GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; + //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll index 8a0b244a23f..3cd54169745 100644 --- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll +++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll @@ -1,12 +1,18 @@ -; RUN: llc < %s | grep -E {sar|shl|mov|or} | count 4 +; RUN: llc < %s | FileCheck %s + ; Check that the shr(shl X, 56), 48) is not mistakenly turned into ; a shr (X, -8) that gets subsequently "optimized away" as undef ; PR4254 + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" define i64 @foo(i64 %b) nounwind readnone { entry: +; CHECK: foo: +; CHECK: shlq $56, %rdi +; CHECK: sarq $48, %rdi +; CHECK: leaq 1(%rdi), %rax %shl = shl i64 %b, 56 ; [#uses=1] %shr = ashr i64 %shl, 48 ; [#uses=1] %add5 = or i64 %shr, 1 ; [#uses=1] diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll new file mode 100644 index 00000000000..395ba46aab3 --- /dev/null +++ b/test/CodeGen/X86/3addr-or.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; rdar://7527734 + +define i32 @test(i32 %x) nounwind readnone ssp { +entry: +; CHECK: test: +; CHECK: leal 3(%rdi), %eax + %0 = shl i32 %x, 5 ; [#uses=1] + %1 = or i32 %0, 3 ; [#uses=1] + ret i32 %1 +}