mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-23 04:52:54 +02:00
20a3d2184f
Summary: Local values are constants, global addresses, and stack addresses that can't be folded into the instruction that uses them. For example, when storing the address of a global variable into memory, we need to materialize that address into a register. FastISel doesn't want to materialize any given local value more than once, so it generates all local value materialization code at EmitStartPt, which always dominates the current insertion point. This allows it to maintain a map of local value registers, and it knows that the local value area will always dominate the current insertion point. The downside is that local value instructions are always emitted without a source location. This is done to prevent jumpy line tables, but it means that the local value area will be considered part of the previous statement. Consider this C code: call1(); // line 1 ++global; // line 2 ++global; // line 3 call2(&global, &local); // line 4 Today we end up with assembly and line tables like this: .loc 1 1 callq call1 leaq global(%rip), %rdi leaq local(%rsp), %rsi .loc 1 2 addq $1, global(%rip) .loc 1 3 addq $1, global(%rip) .loc 1 4 callq call2 The LEA instructions in the local value area have no source location and are treated as being on line 1. Stepping through the code in a debugger and correlating it with the assembly won't make much sense, because these materializations are only required for line 4. This is actually problematic for the VS debugger "set next statement" feature, which effectively assumes that there are no registers live across statement boundaries. By sinking the local value code into the statement and fixing up the source location, we can make that feature work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and https://crbug.com/793819. This change is obviously not enough to make this feature work reliably in all cases, but I felt that it was worth doing anyway because it usually generates smaller, more comprehensible -O0 code. I measured a 0.12% regression in code generation time with LLC on the sqlite3 amalgamation, so I think this is worth doing. There are some special cases worth calling out in the commit message: 1. local values materialized for phis 2. local values used by no-op casts 3. dead local value code Local values can be materialized for phis, and this does not show up as a vreg use in MachineRegisterInfo. In this case, if there are no other uses, this patch sinks the value to the first terminator, EH label, or the end of the BB if nothing else exists. Local values may also be used by no-op casts, which adds the register to the RegFixups table. Without reversing the RegFixups map direction, we don't have enough information to sink these instructions. Lastly, if the local value register has no other uses, we can delete it. This comes up when fastisel tries two instruction selection approaches and the first materializes the value but fails and the second succeeds without using the local value. Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D43093 llvm-svn: 327581
538 lines
20 KiB
LLVM
538 lines
20 KiB
LLVM
; RUN: llc < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s
|
|
; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim -fast-isel | FileCheck -check-prefix=FAST %s
|
|
|
|
; rdar://12648441
|
|
; Generated from arm64-arguments.c with -O2.
|
|
; Test passing structs with size < 8, < 16 and > 16
|
|
; with alignment of 16 and without
|
|
|
|
; Structs with size < 8
|
|
%struct.s38 = type { i32, i16 }
|
|
; With alignment of 16, the size will be padded to multiple of 16 bytes.
|
|
%struct.s39 = type { i32, i16, [10 x i8] }
|
|
; Structs with size < 16
|
|
%struct.s40 = type { i32, i16, i32, i16 }
|
|
%struct.s41 = type { i32, i16, i32, i16 }
|
|
; Structs with size > 16
|
|
%struct.s42 = type { i32, i16, i32, i16, i32, i16 }
|
|
%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
|
|
|
|
@g38 = common global %struct.s38 zeroinitializer, align 4
|
|
@g38_2 = common global %struct.s38 zeroinitializer, align 4
|
|
@g39 = common global %struct.s39 zeroinitializer, align 16
|
|
@g39_2 = common global %struct.s39 zeroinitializer, align 16
|
|
@g40 = common global %struct.s40 zeroinitializer, align 4
|
|
@g40_2 = common global %struct.s40 zeroinitializer, align 4
|
|
@g41 = common global %struct.s41 zeroinitializer, align 16
|
|
@g41_2 = common global %struct.s41 zeroinitializer, align 16
|
|
@g42 = common global %struct.s42 zeroinitializer, align 4
|
|
@g42_2 = common global %struct.s42 zeroinitializer, align 4
|
|
@g43 = common global %struct.s43 zeroinitializer, align 16
|
|
@g43_2 = common global %struct.s43 zeroinitializer, align 16
|
|
|
|
; structs with size < 8 bytes, passed via i64 in x1 and x2
|
|
define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
|
|
entry:
|
|
; CHECK-LABEL: f38
|
|
; CHECK: add w[[A:[0-9]+]], w1, w0
|
|
; CHECK: add {{w[0-9]+}}, w[[A]], w2
|
|
%s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
|
|
%s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
|
|
%s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
|
|
%s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
|
|
%sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
|
|
%sext = trunc i64 %sext8 to i32
|
|
%conv = ashr exact i32 %sext, 16
|
|
%sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
|
|
%sext10 = trunc i64 %sext1011 to i32
|
|
%conv6 = ashr exact i32 %sext10, 16
|
|
%add = add i32 %s1.sroa.0.0.extract.trunc, %i
|
|
%add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
|
|
%add4 = add i32 %add3, %conv
|
|
%add7 = add i32 %add4, %conv6
|
|
ret i32 %add7
|
|
}
|
|
|
|
define i32 @caller38() #1 {
|
|
entry:
|
|
; CHECK-LABEL: caller38
|
|
; CHECK: ldr x1,
|
|
; CHECK: ldr x2,
|
|
%0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
|
|
%1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
|
|
%call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
|
|
i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
|
|
|
|
; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
|
|
; i9 at [sp]
|
|
define i32 @caller38_stack() #1 {
|
|
entry:
|
|
; CHECK-LABEL: caller38_stack
|
|
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
|
|
; CHECK: mov w[[C:[0-9]+]], #9
|
|
; CHECK: str w[[C]], [sp]
|
|
%0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
|
|
%1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
|
|
%call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
|
|
i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
; structs with size < 8 bytes, alignment of 16
|
|
; passed via i128 in x1 and x3
|
|
define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
|
|
entry:
|
|
; CHECK-LABEL: f39
|
|
; CHECK: add w[[A:[0-9]+]], w1, w0
|
|
; CHECK: add {{w[0-9]+}}, w[[A]], w3
|
|
%s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
|
|
%s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
|
|
%s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
|
|
%s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
|
|
%sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
|
|
%sext = trunc i128 %sext8 to i32
|
|
%conv = ashr exact i32 %sext, 16
|
|
%sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
|
|
%sext10 = trunc i128 %sext1011 to i32
|
|
%conv6 = ashr exact i32 %sext10, 16
|
|
%add = add i32 %s1.sroa.0.0.extract.trunc, %i
|
|
%add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
|
|
%add4 = add i32 %add3, %conv
|
|
%add7 = add i32 %add4, %conv6
|
|
ret i32 %add7
|
|
}
|
|
|
|
define i32 @caller39() #1 {
|
|
entry:
|
|
; CHECK-LABEL: caller39
|
|
; CHECK: ldp x1, x2,
|
|
; CHECK: ldp x3, x4,
|
|
%0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
|
|
%1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
|
|
%call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
|
|
i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
|
|
|
|
; structs with size < 8 bytes, alignment 16
|
|
; passed on stack at [sp+16] and [sp+32]
|
|
define i32 @caller39_stack() #1 {
|
|
entry:
|
|
; CHECK-LABEL: caller39_stack
|
|
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
|
|
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
|
|
; CHECK: mov w[[C:[0-9]+]], #9
|
|
; CHECK: str w[[C]], [sp]
|
|
%0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
|
|
%1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
|
|
%call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
|
|
i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
; structs with size < 16 bytes
|
|
; passed via i128 in x1 and x3
|
|
define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
|
|
entry:
|
|
; CHECK-LABEL: f40
|
|
; CHECK: add w[[A:[0-9]+]], w1, w0
|
|
; CHECK: add {{w[0-9]+}}, w[[A]], w3
|
|
%s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
|
|
%s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
|
|
%s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
|
|
%s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
|
|
%s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
|
|
%sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
|
|
%sext = trunc i64 %sext8 to i32
|
|
%conv = ashr exact i32 %sext, 16
|
|
%s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
|
|
%sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
|
|
%sext10 = trunc i64 %sext1011 to i32
|
|
%conv6 = ashr exact i32 %sext10, 16
|
|
%add = add i32 %s1.sroa.0.0.extract.trunc, %i
|
|
%add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
|
|
%add4 = add i32 %add3, %conv
|
|
%add7 = add i32 %add4, %conv6
|
|
ret i32 %add7
|
|
}
|
|
|
|
define i32 @caller40() #1 {
|
|
entry:
|
|
; CHECK-LABEL: caller40
|
|
; CHECK: ldp x1, x2,
|
|
; CHECK: ldp x3, x4,
|
|
%0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
|
|
%1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
|
|
%call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
|
|
i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
|
|
|
|
; structs with size < 16 bytes
|
|
; passed on stack at [sp+8] and [sp+24]
|
|
define i32 @caller40_stack() #1 {
|
|
entry:
|
|
; CHECK-LABEL: caller40_stack
|
|
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
|
|
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
|
|
; CHECK: mov w[[C:[0-9]+]], #9
|
|
; CHECK: str w[[C]], [sp]
|
|
%0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
|
|
%1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
|
|
%call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
|
|
i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
; structs with size < 16 bytes, alignment of 16
|
|
; passed via i128 in x1 and x3
|
|
define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
|
|
entry:
|
|
; CHECK-LABEL: f41
|
|
; CHECK: add w[[A:[0-9]+]], w1, w0
|
|
; CHECK: add {{w[0-9]+}}, w[[A]], w3
|
|
%s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
|
|
%s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
|
|
%s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
|
|
%s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
|
|
%sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
|
|
%sext = trunc i128 %sext8 to i32
|
|
%conv = ashr exact i32 %sext, 16
|
|
%sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
|
|
%sext10 = trunc i128 %sext1011 to i32
|
|
%conv6 = ashr exact i32 %sext10, 16
|
|
%add = add i32 %s1.sroa.0.0.extract.trunc, %i
|
|
%add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
|
|
%add4 = add i32 %add3, %conv
|
|
%add7 = add i32 %add4, %conv6
|
|
ret i32 %add7
|
|
}
|
|
|
|
define i32 @caller41() #1 {
|
|
entry:
|
|
; CHECK-LABEL: caller41
|
|
; CHECK: ldp x1, x2,
|
|
; CHECK: ldp x3, x4,
|
|
%0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
|
|
%1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
|
|
%call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
|
|
i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
|
|
|
|
; structs with size < 16 bytes, alignment of 16
|
|
; passed on stack at [sp+16] and [sp+32]
|
|
define i32 @caller41_stack() #1 {
|
|
entry:
|
|
; CHECK-LABEL: caller41_stack
|
|
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
|
|
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
|
|
; CHECK: mov w[[C:[0-9]+]], #9
|
|
; CHECK: str w[[C]], [sp]
|
|
%0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
|
|
%1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
|
|
%call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
|
|
i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
; structs with size of 22 bytes, passed indirectly in x1 and x2
|
|
define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
|
|
entry:
|
|
; CHECK-LABEL: f42
|
|
; CHECK: ldr w[[A:[0-9]+]], [x1]
|
|
; CHECK: ldr w[[B:[0-9]+]], [x2]
|
|
; CHECK: add w[[C:[0-9]+]], w[[A]], w0
|
|
; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
|
|
; FAST: f42
|
|
; FAST: ldr w[[A:[0-9]+]], [x1]
|
|
; FAST: ldr w[[B:[0-9]+]], [x2]
|
|
; FAST: add w[[C:[0-9]+]], w[[A]], w0
|
|
; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
|
|
%i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0
|
|
%0 = load i32, i32* %i1, align 4, !tbaa !0
|
|
%i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0
|
|
%1 = load i32, i32* %i2, align 4, !tbaa !0
|
|
%s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1
|
|
%2 = load i16, i16* %s, align 2, !tbaa !3
|
|
%conv = sext i16 %2 to i32
|
|
%s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1
|
|
%3 = load i16, i16* %s5, align 2, !tbaa !3
|
|
%conv6 = sext i16 %3 to i32
|
|
%add = add i32 %0, %i
|
|
%add3 = add i32 %add, %1
|
|
%add4 = add i32 %add3, %conv
|
|
%add7 = add i32 %add4, %conv6
|
|
ret i32 %add7
|
|
}
|
|
|
|
; For s1, we allocate a 22-byte space, pass its address via x1
|
|
define i32 @caller42() #3 {
|
|
entry:
|
|
; CHECK-LABEL: caller42
|
|
; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
|
|
; CHECK-DAG: str {{x[0-9]+}}, [sp, #16]
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp]
|
|
; CHECK: add x1, sp, #32
|
|
; CHECK: mov x2, sp
|
|
; Space for s1 is allocated at sp+32
|
|
; Space for s2 is allocated at sp
|
|
|
|
; FAST-LABEL: caller42
|
|
; FAST: sub sp, sp, #96
|
|
; Space for s1 is allocated at fp-24 = sp+56
|
|
; FAST: sub x[[A:[0-9]+]], x29, #24
|
|
; Call memcpy with size = 24 (0x18)
|
|
; FAST: orr {{x[0-9]+}}, xzr, #0x18
|
|
; Space for s2 is allocated at sp+32
|
|
; FAST: add x[[A:[0-9]+]], sp, #32
|
|
; FAST: bl _memcpy
|
|
%tmp = alloca %struct.s42, align 4
|
|
%tmp1 = alloca %struct.s42, align 4
|
|
%0 = bitcast %struct.s42* %tmp to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4
|
|
%1 = bitcast %struct.s42* %tmp1 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4
|
|
%call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4
|
|
|
|
declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
|
|
i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
|
|
%struct.s42* nocapture %s2) #2
|
|
|
|
define i32 @caller42_stack() #3 {
|
|
entry:
|
|
; CHECK-LABEL: caller42_stack
|
|
; CHECK: sub sp, sp, #112
|
|
; CHECK: add x29, sp, #96
|
|
; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16]
|
|
; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
|
|
; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
|
|
; Space for s1 is allocated at x29-32 = sp+64
|
|
; Space for s2 is allocated at sp+32
|
|
; CHECK: add x[[B:[0-9]+]], sp, #32
|
|
; CHECK: str x[[B]], [sp, #16]
|
|
; CHECK: sub x[[A:[0-9]+]], x29, #32
|
|
; Address of s1 is passed on stack at sp+8
|
|
; CHECK: str x[[A]], [sp, #8]
|
|
; CHECK: mov w[[C:[0-9]+]], #9
|
|
; CHECK: str w[[C]], [sp]
|
|
|
|
; FAST-LABEL: caller42_stack
|
|
; Space for s1 is allocated at fp-24
|
|
; FAST: sub x[[A:[0-9]+]], x29, #24
|
|
; Call memcpy with size = 24 (0x18)
|
|
; FAST: orr {{x[0-9]+}}, xzr, #0x18
|
|
; FAST: bl _memcpy
|
|
; Space for s2 is allocated at fp-48
|
|
; FAST: sub x[[B:[0-9]+]], x29, #48
|
|
; Call memcpy again
|
|
; FAST: bl _memcpy
|
|
; Address of s1 is passed on stack at sp+8
|
|
; FAST: str {{w[0-9]+}}, [sp]
|
|
; FAST: str {{x[0-9]+}}, [sp, #8]
|
|
; FAST: str {{x[0-9]+}}, [sp, #16]
|
|
%tmp = alloca %struct.s42, align 4
|
|
%tmp1 = alloca %struct.s42, align 4
|
|
%0 = bitcast %struct.s42* %tmp to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4
|
|
%1 = bitcast %struct.s42* %tmp1 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4
|
|
%call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
|
i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
; structs with size of 22 bytes, alignment of 16
|
|
; passed indirectly in x1 and x2
|
|
define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
|
|
entry:
|
|
; CHECK-LABEL: f43
|
|
; CHECK: ldr w[[A:[0-9]+]], [x1]
|
|
; CHECK: ldr w[[B:[0-9]+]], [x2]
|
|
; CHECK: add w[[C:[0-9]+]], w[[A]], w0
|
|
; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
|
|
; FAST-LABEL: f43
|
|
; FAST: ldr w[[A:[0-9]+]], [x1]
|
|
; FAST: ldr w[[B:[0-9]+]], [x2]
|
|
; FAST: add w[[C:[0-9]+]], w[[A]], w0
|
|
; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
|
|
%i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0
|
|
%0 = load i32, i32* %i1, align 4, !tbaa !0
|
|
%i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0
|
|
%1 = load i32, i32* %i2, align 4, !tbaa !0
|
|
%s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1
|
|
%2 = load i16, i16* %s, align 2, !tbaa !3
|
|
%conv = sext i16 %2 to i32
|
|
%s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1
|
|
%3 = load i16, i16* %s5, align 2, !tbaa !3
|
|
%conv6 = sext i16 %3 to i32
|
|
%add = add i32 %0, %i
|
|
%add3 = add i32 %add, %1
|
|
%add4 = add i32 %add3, %conv
|
|
%add7 = add i32 %add4, %conv6
|
|
ret i32 %add7
|
|
}
|
|
|
|
define i32 @caller43() #3 {
|
|
entry:
|
|
; CHECK-LABEL: caller43
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp, #16]
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp]
|
|
; CHECK: add x1, sp, #32
|
|
; CHECK: mov x2, sp
|
|
; Space for s1 is allocated at sp+32
|
|
; Space for s2 is allocated at sp
|
|
|
|
; FAST-LABEL: caller43
|
|
; FAST: add x29, sp, #64
|
|
; Space for s1 is allocated at sp+32
|
|
; Space for s2 is allocated at sp
|
|
; FAST: str {{x[0-9]+}}, [sp, #32]
|
|
; FAST: str {{x[0-9]+}}, [sp, #40]
|
|
; FAST: str {{x[0-9]+}}, [sp, #48]
|
|
; FAST: str {{x[0-9]+}}, [sp, #56]
|
|
; FAST: str {{x[0-9]+}}, [sp]
|
|
; FAST: str {{x[0-9]+}}, [sp, #8]
|
|
; FAST: str {{x[0-9]+}}, [sp, #16]
|
|
; FAST: str {{x[0-9]+}}, [sp, #24]
|
|
; FAST: add x1, sp, #32
|
|
; FAST: mov x2, sp
|
|
%tmp = alloca %struct.s43, align 16
|
|
%tmp1 = alloca %struct.s43, align 16
|
|
%0 = bitcast %struct.s43* %tmp to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4
|
|
%1 = bitcast %struct.s43* %tmp1 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4
|
|
%call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
|
|
i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
|
|
%struct.s43* nocapture %s2) #2
|
|
|
|
define i32 @caller43_stack() #3 {
|
|
entry:
|
|
; CHECK-LABEL: caller43_stack
|
|
; CHECK: sub sp, sp, #112
|
|
; CHECK: add x29, sp, #96
|
|
; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16]
|
|
; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
|
|
; Space for s1 is allocated at x29-32 = sp+64
|
|
; Space for s2 is allocated at sp+32
|
|
; CHECK: add x[[B:[0-9]+]], sp, #32
|
|
; CHECK: str x[[B]], [sp, #16]
|
|
; CHECK: sub x[[A:[0-9]+]], x29, #32
|
|
; Address of s1 is passed on stack at sp+8
|
|
; CHECK: str x[[A]], [sp, #8]
|
|
; CHECK: mov w[[C:[0-9]+]], #9
|
|
; CHECK: str w[[C]], [sp]
|
|
|
|
; FAST-LABEL: caller43_stack
|
|
; FAST: sub sp, sp, #112
|
|
; Space for s1 is allocated at fp-32 = sp+64
|
|
; Space for s2 is allocated at sp+32
|
|
; FAST: stur {{x[0-9]+}}, [x29, #-32]
|
|
; FAST: stur {{x[0-9]+}}, [x29, #-24]
|
|
; FAST: stur {{x[0-9]+}}, [x29, #-16]
|
|
; FAST: stur {{x[0-9]+}}, [x29, #-8]
|
|
; FAST: str {{x[0-9]+}}, [sp, #32]
|
|
; FAST: str {{x[0-9]+}}, [sp, #40]
|
|
; FAST: str {{x[0-9]+}}, [sp, #48]
|
|
; FAST: str {{x[0-9]+}}, [sp, #56]
|
|
; FAST: str {{w[0-9]+}}, [sp]
|
|
; Address of s1 is passed on stack at sp+8
|
|
; FAST: sub x[[A:[0-9]+]], x29, #32
|
|
; FAST: str x[[A]], [sp, #8]
|
|
; FAST: add x[[B:[0-9]+]], sp, #32
|
|
; FAST: str x[[B]], [sp, #16]
|
|
%tmp = alloca %struct.s43, align 16
|
|
%tmp1 = alloca %struct.s43, align 16
|
|
%0 = bitcast %struct.s43* %tmp to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4
|
|
%1 = bitcast %struct.s43* %tmp1 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4
|
|
%call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
|
i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
; rdar://13668927
|
|
; Check that we don't split an i128.
|
|
declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
|
|
i32 %i6, i32 %i7, i128 %s1, i32 %i8)
|
|
|
|
define i32 @i128_split() {
|
|
entry:
|
|
; CHECK-LABEL: i128_split
|
|
; "i128 %0" should be on stack at [sp].
|
|
; "i32 8" should be on stack at [sp, #16].
|
|
; CHECK: str {{w[0-9]+}}, [sp, #16]
|
|
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
|
|
; FAST-LABEL: i128_split
|
|
; FAST: sub sp, sp
|
|
; FAST: mov x[[ADDR:[0-9]+]], sp
|
|
; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
|
|
; Load/Store opt is disabled with -O0, so the i128 is split.
|
|
; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
|
|
; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
|
|
%0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
|
|
%call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
|
|
i32 6, i32 7, i128 %0, i32 8) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
|
|
i32 %i6, i32 %i7, i64 %s1, i32 %i8)
|
|
|
|
define i32 @i64_split() {
|
|
entry:
|
|
; CHECK-LABEL: i64_split
|
|
; "i64 %0" should be in register x7.
|
|
; "i32 8" should be on stack at [sp].
|
|
; CHECK: ldr x7, [{{x[0-9]+}}]
|
|
; CHECK: str {{w[0-9]+}}, [sp]
|
|
; FAST-LABEL: i64_split
|
|
; FAST: ldr x7, [{{x[0-9]+}}]
|
|
; FAST: mov x[[R0:[0-9]+]], sp
|
|
; FAST: orr w[[R1:[0-9]+]], wzr, #0x8
|
|
; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
|
|
%0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16
|
|
%call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
|
|
i32 6, i32 7, i64 %0, i32 8) #5
|
|
ret i32 %call
|
|
}
|
|
|
|
attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
|
|
attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
|
|
attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
|
|
attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
|
|
attributes #4 = { nounwind }
|
|
attributes #5 = { nobuiltin }
|
|
|
|
!0 = !{!"int", !1}
|
|
!1 = !{!"omnipotent char", !2}
|
|
!2 = !{!"Simple C/C++ TBAA"}
|
|
!3 = !{!"short", !1}
|
|
!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3}
|