mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
For whatever the reason, x86 CallingConv::Fast (i.e. fastcc) was not passing scalar arguments in registers. This patch defines a new fastcc CC which is slightly different from the FastCall CC. In addition to passing integer arguments in ECX and EDX, it also specify doubles are passed in 8-byte slots which are 8-byte aligned (instead of 4-byte aligned). This avoids a potential performance hazard where doubles span cacheline boundaries.
llvm-svn: 55807
This commit is contained in:
parent
3d42ac68df
commit
bd15e330d0
@ -341,3 +341,20 @@ def CC_X86_32_FastCall : CallingConv<[
|
|||||||
// Otherwise, same as everything else.
|
// Otherwise, same as everything else.
|
||||||
CCDelegateTo<CC_X86_32_Common>
|
CCDelegateTo<CC_X86_32_Common>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
|
def CC_X86_32_FastCC : CallingConv<[
|
||||||
|
// Promote i8/i16 arguments to i32.
|
||||||
|
CCIfType<[i8, i16], CCPromoteToType<i32>>,
|
||||||
|
|
||||||
|
// The 'nest' parameter, if any, is passed in EAX.
|
||||||
|
CCIfNest<CCAssignToReg<[EAX]>>,
|
||||||
|
|
||||||
|
// The first 2 integer arguments are passed in ECX/EDX
|
||||||
|
CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
|
||||||
|
|
||||||
|
// Doubles get 8-byte slots that are 8-byte aligned.
|
||||||
|
CCIfType<[f64], CCAssignToStack<8, 8>>,
|
||||||
|
|
||||||
|
// Otherwise, same as everything else.
|
||||||
|
CCDelegateTo<CC_X86_32_Common>
|
||||||
|
]>;
|
||||||
|
@ -1100,6 +1100,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDValue Op) const {
|
|||||||
return CC_X86_32_FastCall;
|
return CC_X86_32_FastCall;
|
||||||
else if (CC == CallingConv::Fast && PerformTailCallOpt)
|
else if (CC == CallingConv::Fast && PerformTailCallOpt)
|
||||||
return CC_X86_32_TailCall;
|
return CC_X86_32_TailCall;
|
||||||
|
else if (CC == CallingConv::Fast)
|
||||||
|
return CC_X86_32_FastCC;
|
||||||
else
|
else
|
||||||
return CC_X86_32_C;
|
return CC_X86_32_C;
|
||||||
}
|
}
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
@rtx_format = external global [116 x i8*] ; <[116 x i8*]*> [#uses=1]
|
@rtx_format = external global [116 x i8*] ; <[116 x i8*]*> [#uses=1]
|
||||||
@rtx_length = external global [117 x i32] ; <[117 x i32]*> [#uses=1]
|
@rtx_length = external global [117 x i32] ; <[117 x i32]*> [#uses=1]
|
||||||
|
|
||||||
declare fastcc %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
|
declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
|
||||||
|
|
||||||
define fastcc %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
|
define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
|
||||||
entry:
|
entry:
|
||||||
%tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1]
|
%tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1]
|
||||||
br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
|
br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
|
||||||
@ -30,7 +30,7 @@ cond_true13: ; preds = %cond_next
|
|||||||
br i1 %tmp22, label %cond_true25, label %cond_next32
|
br i1 %tmp22, label %cond_true25, label %cond_next32
|
||||||
|
|
||||||
cond_true25: ; preds = %cond_true13
|
cond_true25: ; preds = %cond_true13
|
||||||
%tmp29 = tail call fastcc %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) ; <%struct.rtx_def*> [#uses=1]
|
%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) ; <%struct.rtx_def*> [#uses=1]
|
||||||
ret %struct.rtx_def* %tmp29
|
ret %struct.rtx_def* %tmp29
|
||||||
|
|
||||||
cond_next32: ; preds = %cond_true13, %cond_next
|
cond_next32: ; preds = %cond_true13, %cond_next
|
||||||
@ -56,7 +56,7 @@ cond_true47: ; preds = %bb
|
|||||||
%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
|
%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
|
||||||
%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
|
%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
|
||||||
%tmp55 = load %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1]
|
%tmp55 = load %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1]
|
||||||
%tmp58 = tail call fastcc %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1]
|
%tmp58 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1]
|
||||||
%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1]
|
%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1]
|
||||||
%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32 ; <i32> [#uses=1]
|
%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32 ; <i32> [#uses=1]
|
||||||
store i32 %tmp58.c, i32* %tmp62
|
store i32 %tmp58.c, i32* %tmp62
|
||||||
@ -79,7 +79,7 @@ bb73: ; preds = %bb73, %bb105.preheader
|
|||||||
%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1]
|
%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1]
|
||||||
%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
|
%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
|
||||||
%tmp95 = load %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1]
|
%tmp95 = load %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1]
|
||||||
%tmp98 = tail call fastcc %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1]
|
%tmp98 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1]
|
||||||
%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1]
|
%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1]
|
||||||
%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32 ; <i32> [#uses=1]
|
%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32 ; <i32> [#uses=1]
|
||||||
store i32 %tmp98.c, i32* %tmp101
|
store i32 %tmp98.c, i32* %tmp101
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
|
%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
|
||||||
|
|
||||||
define fastcc %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind {
|
define %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind {
|
||||||
entry:
|
entry:
|
||||||
br i1 true, label %bb43.i, label %bb.i
|
br i1 true, label %bb43.i, label %bb.i
|
||||||
|
|
||||||
@ -39,12 +39,12 @@ bb34: ; preds = %CheckOutside.exit20.i
|
|||||||
%tmp15.reg2mem.0 = sdiv i32 %size, 2 ; <i32> [#uses=7]
|
%tmp15.reg2mem.0 = sdiv i32 %size, 2 ; <i32> [#uses=7]
|
||||||
%tmp85 = sub i32 %center_y, %tmp15.reg2mem.0 ; <i32> [#uses=2]
|
%tmp85 = sub i32 %center_y, %tmp15.reg2mem.0 ; <i32> [#uses=2]
|
||||||
%tmp88 = sub i32 %center_x, %tmp15.reg2mem.0 ; <i32> [#uses=2]
|
%tmp88 = sub i32 %center_x, %tmp15.reg2mem.0 ; <i32> [#uses=2]
|
||||||
%tmp92 = tail call fastcc %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0]
|
%tmp92 = tail call %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0]
|
||||||
%tmp99 = add i32 0, %hi_proc ; <i32> [#uses=1]
|
%tmp99 = add i32 0, %hi_proc ; <i32> [#uses=1]
|
||||||
%tmp100 = sdiv i32 %tmp99, 2 ; <i32> [#uses=1]
|
%tmp100 = sdiv i32 %tmp99, 2 ; <i32> [#uses=1]
|
||||||
%tmp110 = tail call fastcc %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0]
|
%tmp110 = tail call %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0]
|
||||||
%tmp122 = add i32 %tmp15.reg2mem.0, %center_y ; <i32> [#uses=2]
|
%tmp122 = add i32 %tmp15.reg2mem.0, %center_y ; <i32> [#uses=2]
|
||||||
%tmp129 = tail call fastcc %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0]
|
%tmp129 = tail call %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0]
|
||||||
%tmp147 = tail call fastcc %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0]
|
%tmp147 = tail call %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0]
|
||||||
unreachable
|
unreachable
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
|
%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
|
||||||
|
|
||||||
define fastcc i32 @perimeter(%struct.quad_struct* %tree, i32 %size) nounwind {
|
define i32 @perimeter(%struct.quad_struct* %tree, i32 %size) nounwind {
|
||||||
entry:
|
entry:
|
||||||
switch i32 %size, label %UnifiedReturnBlock [
|
switch i32 %size, label %UnifiedReturnBlock [
|
||||||
i32 2, label %bb
|
i32 2, label %bb
|
||||||
@ -10,8 +10,8 @@ entry:
|
|||||||
]
|
]
|
||||||
|
|
||||||
bb: ; preds = %entry
|
bb: ; preds = %entry
|
||||||
%tmp31 = tail call fastcc i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind ; <i32> [#uses=1]
|
%tmp31 = tail call i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind ; <i32> [#uses=1]
|
||||||
%tmp40 = tail call fastcc i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind ; <i32> [#uses=1]
|
%tmp40 = tail call i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind ; <i32> [#uses=1]
|
||||||
%tmp33 = add i32 0, %tmp31 ; <i32> [#uses=1]
|
%tmp33 = add i32 0, %tmp31 ; <i32> [#uses=1]
|
||||||
%tmp42 = add i32 %tmp33, %tmp40 ; <i32> [#uses=1]
|
%tmp42 = add i32 %tmp33, %tmp40 ; <i32> [#uses=1]
|
||||||
ret i32 %tmp42
|
ret i32 %tmp42
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep fldl | count 1
|
||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1
|
|
||||||
|
|
||||||
define fastcc double @doload64(i64 %x) nounwind {
|
define double @doload64(i64 %x) nounwind {
|
||||||
%tmp717 = bitcast i64 %x to double
|
%tmp717 = bitcast i64 %x to double
|
||||||
ret double %tmp717
|
ret double %tmp717
|
||||||
}
|
}
|
||||||
|
19
test/CodeGen/X86/fastcc.ll
Normal file
19
test/CodeGen/X86/fastcc.ll
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep mov | grep ecx | grep 0
|
||||||
|
; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep mov | grep xmm0 | grep 16
|
||||||
|
|
||||||
|
@d = external global double ; <double*> [#uses=1]
|
||||||
|
@c = external global double ; <double*> [#uses=1]
|
||||||
|
@b = external global double ; <double*> [#uses=1]
|
||||||
|
@a = external global double ; <double*> [#uses=1]
|
||||||
|
|
||||||
|
define i32 @foo() nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = load double* @d, align 8 ; <double> [#uses=1]
|
||||||
|
%1 = load double* @c, align 8 ; <double> [#uses=1]
|
||||||
|
%2 = load double* @b, align 8 ; <double> [#uses=1]
|
||||||
|
%3 = load double* @a, align 8 ; <double> [#uses=1]
|
||||||
|
tail call fastcc void @bar( i32 0, i32 1, i32 2, double 1.000000e+00, double %3, double %2, double %1, double %0 ) nounwind
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare fastcc void @bar(i32, i32, i32, double, double, double, double, double)
|
Loading…
Reference in New Issue
Block a user