mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[X86] Clzero intrinsic and its addition under znver1
This patch does the following. 1. Adds an Intrinsic int_x86_clzero which works with __builtin_ia32_clzero 2. Identifies clzero feature using cpuid info. (Function:8000_0008, Checks if EBX[0]=1) 3. Adds the clzero feature under znver1 architecture. 4. The custom inserter is added in Lowering. 5. A testcase is added to check the intrinsic. 6. The clzero instruction is added to assembler test. Patch by Ganesh Gopalasubramanian with a couple formatting tweaks, a disassembler test, and using update_llc_test.py from me. Differential revision: https://reviews.llvm.org/D29385 llvm-svn: 294558
This commit is contained in:
parent
9aa659cc4d
commit
c2247a32db
@ -6495,3 +6495,10 @@ let TargetPrefix = "x86" in {
|
||||
: GCCBuiltin<"__builtin_ia32_mwaitx">,
|
||||
Intrinsic<[], [ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], []>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Cache-line zero
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">,
|
||||
Intrinsic<[], [llvm_ptr_ty], []>;
|
||||
}
|
||||
|
@ -1353,6 +1353,10 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
|
||||
Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
|
||||
Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
|
||||
|
||||
bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
|
||||
!getX86CpuIDAndInfoEx(0x80000008,0x0, &EAX, &EBX, &ECX, &EDX);
|
||||
Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
|
||||
|
||||
bool HasLeaf7 =
|
||||
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
|
||||
|
||||
|
@ -202,6 +202,8 @@ def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
|
||||
"Support LAHF and SAHF instructions">;
|
||||
def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
|
||||
"Enable MONITORX/MWAITX timer functionality">;
|
||||
def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
|
||||
"Enable Cache Line Zero">;
|
||||
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
|
||||
"Support MPX instructions">;
|
||||
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
|
||||
@ -765,6 +767,7 @@ def: ProcessorModel<"znver1", BtVer2Model, [
|
||||
FeatureBMI,
|
||||
FeatureBMI2,
|
||||
FeatureCLFLUSHOPT,
|
||||
FeatureCLZERO,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureF16C,
|
||||
FeatureFMA,
|
||||
|
@ -24418,6 +24418,26 @@ static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
|
||||
return BB;
|
||||
}
|
||||
|
||||
static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB,
|
||||
const X86Subtarget &Subtarget) {
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
|
||||
// Address into RAX/EAX
|
||||
unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
|
||||
unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
|
||||
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
|
||||
for (int i = 0; i < X86::AddrNumOperands; ++i)
|
||||
MIB.add(MI->getOperand(i));
|
||||
|
||||
// The instruction doesn't actually take any operands though.
|
||||
BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr));
|
||||
|
||||
MI->eraseFromParent(); // The pseudo is gone now.
|
||||
return BB;
|
||||
}
|
||||
|
||||
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const {
|
||||
@ -26038,6 +26058,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr);
|
||||
case X86::MONITORX:
|
||||
return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr);
|
||||
|
||||
// Cache line zero
|
||||
case X86::CLZERO:
|
||||
return emitClzero(&MI, BB, Subtarget);
|
||||
|
||||
// PKU feature
|
||||
case X86::WRPKRU:
|
||||
return emitWRPKRU(MI, BB, Subtarget);
|
||||
|
@ -859,6 +859,7 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
|
||||
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
|
||||
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
|
||||
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
|
||||
def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
|
||||
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
|
||||
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
|
||||
def HasMPX : Predicate<"Subtarget->hasMPX()">;
|
||||
@ -2456,8 +2457,19 @@ def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CLZERO Instruction
|
||||
//
|
||||
let Uses = [EAX] in
|
||||
def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB;
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Uses = [EAX] in
|
||||
def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", [], IIC_SSE_CLZERO>,
|
||||
TB, Requires<[HasCLZERO]>;
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
def CLZERO : PseudoI<(outs), (ins i32mem:$src1),
|
||||
[(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>;
|
||||
}
|
||||
} // SchedRW
|
||||
|
||||
def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>;
|
||||
def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pattern fragments to auto generate TBM instructions.
|
||||
|
@ -366,6 +366,7 @@ def IIC_SSE_MWAIT : InstrItinClass;
|
||||
def IIC_SSE_MONITOR : InstrItinClass;
|
||||
def IIC_SSE_MWAITX : InstrItinClass;
|
||||
def IIC_SSE_MONITORX : InstrItinClass;
|
||||
def IIC_SSE_CLZERO : InstrItinClass;
|
||||
|
||||
def IIC_SSE_PREFETCH : InstrItinClass;
|
||||
def IIC_SSE_PAUSE : InstrItinClass;
|
||||
|
@ -289,6 +289,7 @@ void X86Subtarget::initializeEnvironment() {
|
||||
HasRDSEED = false;
|
||||
HasLAHFSAHF = false;
|
||||
HasMWAITX = false;
|
||||
HasCLZERO = false;
|
||||
HasMPX = false;
|
||||
IsBTMemSlow = false;
|
||||
IsPMULLDSlow = false;
|
||||
|
@ -175,6 +175,9 @@ protected:
|
||||
/// Processor has MONITORX/MWAITX instructions.
|
||||
bool HasMWAITX;
|
||||
|
||||
/// Processor has Cache Line Zero instruction
|
||||
bool HasCLZERO;
|
||||
|
||||
/// Processor has Prefetch with intent to Write instruction
|
||||
bool HasPFPREFETCHWT1;
|
||||
|
||||
@ -460,6 +463,7 @@ public:
|
||||
bool hasRDSEED() const { return HasRDSEED; }
|
||||
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
|
||||
bool hasMWAITX() const { return HasMWAITX; }
|
||||
bool hasCLZERO() const { return HasCLZERO; }
|
||||
bool isBTMemSlow() const { return IsBTMemSlow; }
|
||||
bool isSHLDSlow() const { return IsSHLDSlow; }
|
||||
bool isPMULLDSlow() const { return IsPMULLDSlow; }
|
||||
|
23
test/CodeGen/X86/clzero.ll
Normal file
23
test/CodeGen/X86/clzero.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+clzero | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i386-pc-linux -mattr=+clzero | FileCheck %s --check-prefix=X32
|
||||
|
||||
define void @foo(i8* %p) #0 {
|
||||
; X64-LABEL: foo:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: leaq (%rdi), %rax
|
||||
; X64-NEXT: clzero
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-LABEL: foo:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: leal (%eax), %eax
|
||||
; X32-NEXT: clzero
|
||||
; X32-NEXT: retl
|
||||
entry:
|
||||
tail call void @llvm.x86.clzero(i8* %p) #1
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.x86.clzero(i8*) #1
|
@ -129,6 +129,9 @@
|
||||
# CHECK: invlpga
|
||||
0x0f 0x01 0xdf
|
||||
|
||||
# CHECK: clzero
|
||||
0x0f,0x01,0xfc
|
||||
|
||||
# CHECK: movl $0, -4(%ebp)
|
||||
0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
|
||||
|
||||
|
@ -444,6 +444,14 @@ cmovnae %bx,%bx
|
||||
// CHECK: encoding: [0x0f,0x21,0xf8]
|
||||
movl %dr7,%eax
|
||||
|
||||
// CHECK: clzero
|
||||
// CHECK: encoding: [0x0f,0x01,0xfc]
|
||||
clzero
|
||||
|
||||
// CHECK: clzero
|
||||
// CHECK: encoding: [0x0f,0x01,0xfc]
|
||||
clzero %eax
|
||||
|
||||
// radr://8017522
|
||||
// CHECK: wait
|
||||
// CHECK: encoding: [0x9b]
|
||||
|
@ -1502,6 +1502,14 @@ vmovq %xmm0, %rax
|
||||
// CHECK: encoding: [0x0f,0x01,0xfb]
|
||||
mwaitx %rax, %rcx, %rbx
|
||||
|
||||
// CHECK: clzero
|
||||
// CHECK: encoding: [0x0f,0x01,0xfc]
|
||||
clzero
|
||||
|
||||
// CHECK: clzero
|
||||
// CHECK: encoding: [0x0f,0x01,0xfc]
|
||||
clzero %rax
|
||||
|
||||
// CHECK: movl %r15d, (%r15,%r15)
|
||||
// CHECK: encoding: [0x47,0x89,0x3c,0x3f]
|
||||
movl %r15d, (%r15,%r15)
|
||||
|
Loading…
Reference in New Issue
Block a user