From 4a7ff81d5f4e58cc7861513e32369556a033af0f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Oct 2013 05:42:48 +0000 Subject: [PATCH] Teach X86 asm parser that VMOVAPSrr and other VEX-encoded register to register moves should be switched from using the MRMSrcReg form to the MRMDestReg form if the source register is a 64-bit extended register and the destination register is not. This allows the instruction to be encoded using the 2-byte VEX form instead of the 3-byte VEX form. The GNU assembler has similar behavior and instruction selection already does this. llvm-svn: 192088 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 49 ++++++++++++++++++++ lib/Target/X86/X86InstrSSE.td | 2 +- test/MC/X86/x86_64-avx-encoding.s | 56 +++++++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 93c2169a408..dc9654fa580 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2271,6 +2271,55 @@ processInstruction(MCInst &Inst, case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); + case X86::VMOVAPDrr: + case X86::VMOVAPDYrr: + case X86::VMOVAPSrr: + case X86::VMOVAPSYrr: + case X86::VMOVDQArr: + case X86::VMOVDQAYrr: + case X86::VMOVDQUrr: + case X86::VMOVDQUYrr: + case X86::VMOVUPDrr: + case X86::VMOVUPDYrr: + case X86::VMOVUPSrr: + case X86::VMOVUPSYrr: { + if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || + !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg())) + return false; + + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + } + Inst.setOpcode(NewOpc); + return true; + } + case X86::VMOVSDrr: + case X86::VMOVSSrr: { + if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || + !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg())) + return false; + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; + case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + } + Inst.setOpcode(NewOpc); + return true; + } } } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3d8654900c4..f6f2266244b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1104,7 +1104,7 @@ let Predicates = [UseSSE1] in { // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let neverHasSideEffects = 1, SchedRW = [WriteMove] in { +let isCodeGenOnly = 1, neverHasSideEffects = 1, SchedRW = [WriteMove] in { def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s index 6da9e21fef6..1a4e7844511 100644 --- a/test/MC/X86/x86_64-avx-encoding.s +++ b/test/MC/X86/x86_64-avx-encoding.s @@ -4185,3 +4185,59 @@ _foo2: // CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 // CHECK: encoding: [0xc4,0x02,0x3d,0x91,0x14,0x4f] vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 + +// CHECK: vmovaps %xmm0, %xmm8 +// CHECK: encoding: [0xc5,0x78,0x28,0xc0] + vmovaps %xmm0, %xmm8 + +// CHECK: vmovaps %xmm8, %xmm0 +// CHECK: encoding: [0xc5,0x78,0x29,0xc0] + vmovaps %xmm8, %xmm0 + +// CHECK: vmovaps %ymm0, %ymm8 +// CHECK: encoding: [0xc5,0x7c,0x28,0xc0] + vmovaps %ymm0, %ymm8 + +// CHECK: vmovaps %ymm8, %ymm0 +// CHECK: encoding: [0xc5,0x7c,0x29,0xc0] + vmovaps %ymm8, %ymm0 + +// CHECK: vmovups %xmm0, %xmm8 +// CHECK: encoding: [0xc5,0x78,0x10,0xc0] + vmovups %xmm0, %xmm8 + +// CHECK: vmovups %xmm8, %xmm0 +// CHECK: encoding: [0xc5,0x78,0x11,0xc0] + vmovups %xmm8, %xmm0 + +// CHECK: vmovups %ymm0, %ymm8 +// CHECK: encoding: [0xc5,0x7c,0x10,0xc0] + vmovups %ymm0, %ymm8 + +// CHECK: vmovups %ymm8, %ymm0 +// CHECK: encoding: [0xc5,0x7c,0x11,0xc0] + vmovups %ymm8, %ymm0 + +// CHECK: vmovss %xmm0, %xmm0, %xmm8 +// CHECK: encoding: [0xc5,0x7a,0x10,0xc0] + vmovss %xmm0, %xmm0, %xmm8 + +// CHECK: vmovss %xmm0, %xmm8, %xmm0 +// CHECK: encoding: [0xc5,0xba,0x10,0xc0] + vmovss %xmm0, %xmm8, %xmm0 + +// CHECK: vmovss %xmm8, %xmm0, %xmm0 +// CHECK: encoding: [0xc5,0x7a,0x11,0xc0] + vmovss %xmm8, %xmm0, %xmm0 + +// CHECK: vmovsd %xmm0, %xmm0, %xmm8 +// CHECK: encoding: [0xc5,0x7b,0x10,0xc0] + vmovsd %xmm0, %xmm0, %xmm8 + +// CHECK: vmovsd %xmm0, %xmm8, %xmm0 +// CHECK: encoding: [0xc5,0xbb,0x10,0xc0] + vmovsd %xmm0, %xmm8, %xmm0 + +// CHECK: vmovsd %xmm8, %xmm0, %xmm0 +// CHECK: encoding: [0xc5,0x7b,0x11,0xc0] + vmovsd %xmm8, %xmm0, %xmm0