mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] ABI change for x86-32: pass 3 vector arguments in-register instead of 4, except on Darwin.
This changes the ABI used on 32-bit x86 for passing vector arguments. Historically, clang passes the first 4 vector arguments in-register, and additional vector arguments on the stack, regardless of platform. That is different from the behavior of gcc, icc, and msvc, all of which pass only the first 3 arguments in-register. The 3-register convention is documented, unofficially, in Agner's calling convention guide, and, officially, in the recently released version 1.0 of the i386 psABI. Darwin is kept as is because the OS X ABI Function Call Guide explicitly documents the current (4-register) behavior. This fixes PR21510 Differential revision: http://reviews.llvm.org/D9644 llvm-svn: 237682
This commit is contained in:
parent
3eef2468e3
commit
c9165a5a41
@ -445,9 +445,61 @@ def CC_X86_64_AnyReg : CallingConv<[
|
|||||||
// X86 C Calling Convention
|
// X86 C Calling Convention
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
/// CC_X86_32_Vector_Common - In all X86-32 calling conventions, extra vector
|
||||||
|
/// values are spilled on the stack.
|
||||||
|
def CC_X86_32_Vector_Common : CallingConv<[
|
||||||
|
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
|
||||||
|
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
|
||||||
|
|
||||||
|
// 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
|
||||||
|
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||||
|
CCAssignToStack<32, 32>>,
|
||||||
|
|
||||||
|
// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
|
||||||
|
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
||||||
|
CCAssignToStack<64, 64>>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
// CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
|
||||||
|
// vector registers
|
||||||
|
def CC_X86_32_Vector_Standard : CallingConv<[
|
||||||
|
// SSE vector arguments are passed in XMM registers.
|
||||||
|
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||||
|
CCAssignToReg<[XMM0, XMM1, XMM2]>>>,
|
||||||
|
|
||||||
|
// AVX 256-bit vector arguments are passed in YMM registers.
|
||||||
|
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||||
|
CCIfSubtarget<"hasFp256()",
|
||||||
|
CCAssignToReg<[YMM0, YMM1, YMM2]>>>>,
|
||||||
|
|
||||||
|
// AVX 512-bit vector arguments are passed in ZMM registers.
|
||||||
|
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
||||||
|
CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,
|
||||||
|
|
||||||
|
CCDelegateTo<CC_X86_32_Vector_Common>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
// CC_X86_32_Vector_Darwin - The first 4 vector arguments are passed in
|
||||||
|
// vector registers.
|
||||||
|
def CC_X86_32_Vector_Darwin : CallingConv<[
|
||||||
|
// SSE vector arguments are passed in XMM registers.
|
||||||
|
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||||
|
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
|
||||||
|
|
||||||
|
// AVX 256-bit vector arguments are passed in YMM registers.
|
||||||
|
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||||
|
CCIfSubtarget<"hasFp256()",
|
||||||
|
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
|
||||||
|
|
||||||
|
// AVX 512-bit vector arguments are passed in ZMM registers.
|
||||||
|
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
||||||
|
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,
|
||||||
|
|
||||||
|
CCDelegateTo<CC_X86_32_Vector_Common>
|
||||||
|
]>;
|
||||||
|
|
||||||
/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
|
/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
|
||||||
/// values are spilled on the stack, and the first 4 vector values go in XMM
|
/// values are spilled on the stack.
|
||||||
/// regs.
|
|
||||||
def CC_X86_32_Common : CallingConv<[
|
def CC_X86_32_Common : CallingConv<[
|
||||||
// Handles byval parameters.
|
// Handles byval parameters.
|
||||||
CCIfByVal<CCPassByVal<4, 4>>,
|
CCIfByVal<CCPassByVal<4, 4>>,
|
||||||
@ -483,33 +535,16 @@ def CC_X86_32_Common : CallingConv<[
|
|||||||
CCIfType<[v32i1], CCPromoteToType<v32i8>>,
|
CCIfType<[v32i1], CCPromoteToType<v32i8>>,
|
||||||
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
|
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
|
||||||
|
|
||||||
// The first 4 SSE vector arguments are passed in XMM registers.
|
|
||||||
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
|
||||||
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
|
|
||||||
|
|
||||||
// The first 4 AVX 256-bit vector arguments are passed in YMM registers.
|
|
||||||
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
|
||||||
CCIfSubtarget<"hasFp256()",
|
|
||||||
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
|
|
||||||
|
|
||||||
// The first 4 AVX 512-bit vector arguments are passed in ZMM registers.
|
|
||||||
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
|
||||||
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,
|
|
||||||
|
|
||||||
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
|
|
||||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
|
|
||||||
|
|
||||||
// 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
|
|
||||||
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
|
||||||
CCAssignToStack<32, 32>>,
|
|
||||||
|
|
||||||
// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
|
|
||||||
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
|
||||||
CCAssignToStack<64, 64>>,
|
|
||||||
|
|
||||||
// __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
|
// __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
|
||||||
// passed in the parameter area.
|
// passed in the parameter area.
|
||||||
CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
|
CCIfType<[x86mmx], CCAssignToStack<8, 4>>,
|
||||||
|
|
||||||
|
// Darwin passes vectors in a form that differs from the i386 psABI
|
||||||
|
CCIfSubtarget<"isTargetDarwin()", CCDelegateTo<CC_X86_32_Vector_Darwin>>,
|
||||||
|
|
||||||
|
// Otherwise, drop to 'normal' X86-32 CC
|
||||||
|
CCDelegateTo<CC_X86_32_Vector_Standard>
|
||||||
|
]>;
|
||||||
|
|
||||||
def CC_X86_32_C : CallingConv<[
|
def CC_X86_32_C : CallingConv<[
|
||||||
// Promote i1/i8/i16 arguments to i32.
|
// Promote i1/i8/i16 arguments to i32.
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
|
; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
|
||||||
; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
|
; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
|
||||||
|
|
||||||
|
target triple = "i686-pc-linux-gnu"
|
||||||
|
|
||||||
define <1 x float> @test1(<1 x double> %x) nounwind {
|
define <1 x float> @test1(<1 x double> %x) nounwind {
|
||||||
; CHECK-LABEL: test1:
|
; CHECK-LABEL: test1:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
@ -59,12 +61,14 @@ define <4 x float> @test3(<4 x double> %x) nounwind {
|
|||||||
define <8 x float> @test4(<8 x double> %x) nounwind {
|
define <8 x float> @test4(<8 x double> %x) nounwind {
|
||||||
; CHECK-LABEL: test4:
|
; CHECK-LABEL: test4:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
|
; CHECK-NEXT: subl $12, %esp
|
||||||
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
|
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
|
||||||
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
|
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
|
||||||
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||||
; CHECK-NEXT: cvtpd2ps %xmm3, %xmm3
|
|
||||||
; CHECK-NEXT: cvtpd2ps %xmm2, %xmm1
|
; CHECK-NEXT: cvtpd2ps %xmm2, %xmm1
|
||||||
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
; CHECK-NEXT: cvtpd2ps 16(%esp), %xmm2
|
||||||
|
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||||
|
; CHECK-NEXT: addl $12, %esp
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
;
|
;
|
||||||
; AVX-LABEL: test4:
|
; AVX-LABEL: test4:
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm3, %xmm1"
|
; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm3, %xmm1"
|
||||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm2, %xmm0"
|
; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm2, %xmm0"
|
||||||
|
|
||||||
|
target triple = "i686-apple-darwin8"
|
||||||
|
|
||||||
define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
|
define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
|
||||||
%y = fmul <4 x double> %x, %z
|
%y = fmul <4 x double> %x, %z
|
||||||
ret <4 x double> %y
|
ret <4 x double> %y
|
||||||
|
44
test/CodeGen/X86/x86-32-vector-calling-conv.ll
Normal file
44
test/CodeGen/X86/x86-32-vector-calling-conv.ll
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=DARWIN
|
||||||
|
; RUN: llc < %s -mtriple=i686-pc-linux -mattr=+avx512f | FileCheck %s --check-prefix=LINUX
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_sse:
|
||||||
|
; DARWIN-DAG: vpaddd %xmm1, %xmm0, %xmm0
|
||||||
|
; DARWIN-DAG: vpaddd %xmm3, %xmm2, %xmm1
|
||||||
|
; DARWIN: vpaddd %xmm1, %xmm0, %xmm0
|
||||||
|
; LINUX-DAG: vpaddd %xmm1, %xmm0, %xmm0
|
||||||
|
; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %xmm2, %xmm1
|
||||||
|
; LINUX: vpaddd %xmm1, %xmm0, %xmm0
|
||||||
|
define <4 x i32> @test_sse(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) nounwind {
|
||||||
|
%r0 = add <4 x i32> %a, %b
|
||||||
|
%r1 = add <4 x i32> %c, %d
|
||||||
|
%ret = add <4 x i32> %r0, %r1
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_avx:
|
||||||
|
; DARWIN-DAG: vpaddd %ymm1, %ymm0, %ymm0
|
||||||
|
; DARWIN-DAG: vpaddd %ymm3, %ymm2, %ymm1
|
||||||
|
; DARWIN: vpaddd %ymm1, %ymm0, %ymm0
|
||||||
|
; LINUX-DAG: vpaddd %ymm1, %ymm0, %ymm0
|
||||||
|
; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %ymm2, %ymm1
|
||||||
|
; LINUX: vpaddd %ymm1, %ymm0, %ymm0
|
||||||
|
define <8 x i32> @test_avx(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) nounwind {
|
||||||
|
%r0 = add <8 x i32> %a, %b
|
||||||
|
%r1 = add <8 x i32> %c, %d
|
||||||
|
%ret = add <8 x i32> %r0, %r1
|
||||||
|
ret <8 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_avx512:
|
||||||
|
; DARWIN-DAG: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
|
; DARWIN-DAG: vpaddd %zmm3, %zmm2, %zmm1
|
||||||
|
; DARWIN: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
|
; LINUX-DAG: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
|
; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %zmm2, %zmm1
|
||||||
|
; LINUX: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
|
define <16 x i32> @test_avx512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) nounwind {
|
||||||
|
%r0 = add <16 x i32> %a, %b
|
||||||
|
%r1 = add <16 x i32> %c, %d
|
||||||
|
%ret = add <16 x i32> %r0, %r1
|
||||||
|
ret <16 x i32> %ret
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user