mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary: A number of optimizations are inhibited by single-use TokenFactors not being merged into the TokenFactor using it. This makes we consider if we can do the merge immediately. Most tests changes here are due to the change in visitation causing minor reorderings and associated reassociation of paired memory operations. CodeGen tests with non-reordering changes: X86/aligned-variadic.ll -- memory-based add folded into stored leaq value. X86/constant-combiners.ll -- Optimizes out overlap between stores. X86/pr40631_deadstore_elision -- folds constant byte store into preceding quad word constant store. Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet Reviewed By: courbet Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59260 llvm-svn: 356068
This commit is contained in:
parent
268ff3b066
commit
6655b5e078
@ -1709,6 +1709,12 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
|
||||
if (OptLevel == CodeGenOpt::None)
|
||||
return SDValue();
|
||||
|
||||
// If this is used only a single token factor, we should make sure we have a
|
||||
// chance to merge them together. This prevents TF chains from inhibiting
|
||||
// optimizations.
|
||||
if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
|
||||
AddToWorklist(*(N->use_begin()));
|
||||
|
||||
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
|
||||
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
|
||||
SmallPtrSet<SDNode*, 16> SeenOps;
|
||||
|
@ -5,10 +5,10 @@ entry:
|
||||
; CHECK: str x30, [sp, #-80]!
|
||||
; CHECK: add x8, sp, #24
|
||||
; CHECK: add x0, sp, #24
|
||||
; CHECK: stp x6, x7, [sp, #64]
|
||||
; CHECK: stp x4, x5, [sp, #48]
|
||||
; CHECK: stp x2, x3, [sp, #32]
|
||||
; CHECK: str x1, [sp, #24]
|
||||
; CHECK: stp x1, x2, [sp, #24]
|
||||
; CHECK: stp x3, x4, [sp, #40]
|
||||
; CHECK: stp x5, x6, [sp, #56]
|
||||
; CHECK: str x7, [sp, #72]
|
||||
; CHECK: str x8, [sp, #8]
|
||||
; CHECK: bl other_func
|
||||
; CHECK: ldr x30, [sp], #80
|
||||
|
@ -44,7 +44,7 @@ entry:
|
||||
; CHECK: sub sp, sp, #96
|
||||
; CHECK: stp x29, x30, [sp, #16]
|
||||
; CHECK: add x29, sp, #16
|
||||
; CHECK: str x1, [x29, #24]
|
||||
; CHECK: stp x1, x2, [x29, #24]
|
||||
; CHECK: add x1, x29, #8
|
||||
; CHECK: ldp x29, x30, [sp, #16]
|
||||
; CHECK: add sp, sp, #96
|
||||
|
@ -78,22 +78,22 @@ define void @test_variadic_alloca(i64 %n, ...) {
|
||||
; CHECK: stp x29, x30, [sp, #-16]!
|
||||
; CHECK: mov x29, sp
|
||||
; CHECK: sub sp, sp, #192
|
||||
; CHECK: stp q6, q7, [x29, #-96]
|
||||
; CHECK-DAG: stp q6, q7, [x29, #-96]
|
||||
; [...]
|
||||
; CHECK: stp q0, q1, [x29, #-192]
|
||||
; CHECK-DAG: stp q0, q1, [x29, #-192]
|
||||
|
||||
; CHECK: stp x6, x7, [x29, #-16]
|
||||
; CHECK-DAG: stp x5, x6, [x29, #-24]
|
||||
; [...]
|
||||
; CHECK: stp x2, x3, [x29, #-48]
|
||||
; CHECK-DAG: stp x1, x2, [x29, #-56]
|
||||
|
||||
; CHECK-NOFP-ARM64: stp x29, x30, [sp, #-16]!
|
||||
; CHECK-NOFP-ARM64: mov x29, sp
|
||||
; CHECK-NOFP-ARM64: sub sp, sp, #64
|
||||
; CHECK-NOFP-ARM64: stp x6, x7, [x29, #-16]
|
||||
; CHECK-NOFP-ARM64-DAG: stp x5, x6, [x29, #-24]
|
||||
; [...]
|
||||
; CHECK-NOFP-ARM64: stp x4, x5, [x29, #-32]
|
||||
; CHECK-NOFP-ARM64-DAG: stp x3, x4, [x29, #-40]
|
||||
; [...]
|
||||
; CHECK-NOFP-ARM64: stp x2, x3, [x29, #-48]
|
||||
; CHECK-NOFP-ARM64-DAG: stp x1, x2, [x29, #-56]
|
||||
; [...]
|
||||
; CHECK-NOFP-ARM64: mov x8, sp
|
||||
|
||||
|
@ -16,10 +16,10 @@
|
||||
define i32 @t0() {
|
||||
entry:
|
||||
; CHECK-LABEL: t0:
|
||||
; CHECK: ldur [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #7]
|
||||
; CHECK: stur [[REG0]], [x[[BASEREG2:[0-9]+]], #7]
|
||||
; CHECK: ldr [[REG2:x[0-9]+]],
|
||||
; CHECK: str [[REG2]],
|
||||
; CHECK-DAG: ldur [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #7]
|
||||
; CHECK-DAG: stur [[REG0]], [x[[BASEREG2:[0-9]+]], #7]
|
||||
; CHECK-DAG: ldr [[REG2:x[0-9]+]],
|
||||
; CHECK-DAG: str [[REG2]],
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false)
|
||||
ret i32 0
|
||||
}
|
||||
@ -85,10 +85,10 @@ entry:
|
||||
define void @t6() nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: t6:
|
||||
; CHECK: ldur [[REG9:x[0-9]+]], [x{{[0-9]+}}, #6]
|
||||
; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6]
|
||||
; CHECK: ldr
|
||||
; CHECK: str
|
||||
; CHECK-DAG: ldur [[REG9:x[0-9]+]], [x{{[0-9]+}}, #6]
|
||||
; CHECK-DAG: stur [[REG9]], [x{{[0-9]+}}, #6]
|
||||
; CHECK-DAG: ldr
|
||||
; CHECK-DAG: str
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
@ -14,13 +14,13 @@ define void @test_simple(i32 %n, ...) {
|
||||
; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
|
||||
; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
|
||||
; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]]
|
||||
; CHECK-DAG: stp x6, x7, [sp, #
|
||||
; ... omit middle ones ...
|
||||
; CHECK: str x7, [sp, #
|
||||
; CHECK-DAG: str x1, [sp, #[[GR_BASE:[0-9]+]]]
|
||||
|
||||
; CHECK: stp q0, q1, [sp]
|
||||
; CHECK-DAG: stp q0, q1, [sp]
|
||||
; ... omit middle ones ...
|
||||
; CHECK: stp q6, q7, [sp, #
|
||||
; CHECK-DAG: stp q6, q7, [sp, #
|
||||
|
||||
; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
|
||||
|
||||
@ -50,13 +50,13 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
|
||||
; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
|
||||
; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
|
||||
; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]]
|
||||
; CHECK-DAG: stp x6, x7, [sp, #
|
||||
; ... omit middle ones ...
|
||||
; CHECK: str x7, [sp, #
|
||||
; CHECK-DAG: str x3, [sp, #[[GR_BASE:[0-9]+]]]
|
||||
|
||||
; CHECK: stp q1, q2, [sp]
|
||||
; CHECK-DAG: stp q6, q7, [sp, #80]
|
||||
; ... omit middle ones ...
|
||||
; CHECK: str q7, [sp, #
|
||||
; CHECK-DAG: str q1, [sp]
|
||||
|
||||
; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
|
||||
|
||||
@ -95,10 +95,13 @@ define void @test_nospare([8 x i64], [8 x float], ...) {
|
||||
; __stack field should point just past them.
|
||||
define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
|
||||
; CHECK-LABEL: test_offsetstack:
|
||||
; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]!
|
||||
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
|
||||
; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
; CHECK: str [[STACK_TOP]], [x[[VAR]]]
|
||||
|
||||
; CHECK-DAG: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #48]
|
||||
; CHECK-DAG: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #16]
|
||||
; CHECK-DAG: str {{q[0-9]+}}, [sp]
|
||||
; CHECK-DAG: add [[STACK_TOP:x[0-9]+]], sp, #96
|
||||
; CHECK-DAG: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
||||
; CHECK-DAG: str [[STACK_TOP]], [x[[VAR]]]
|
||||
|
||||
%addr = bitcast %va_list* @var to i8*
|
||||
call void @llvm.va_start(i8* %addr)
|
||||
|
@ -5,10 +5,10 @@ entry:
|
||||
; CHECK: str x30, [sp, #-80]!
|
||||
; CHECK: add x8, sp, #24
|
||||
; CHECK: add x0, sp, #24
|
||||
; CHECK: stp x6, x7, [sp, #64]
|
||||
; CHECK: stp x4, x5, [sp, #48]
|
||||
; CHECK: stp x2, x3, [sp, #32]
|
||||
; CHECK: str x1, [sp, #24]
|
||||
; CHECK: stp x1, x2, [sp, #24]
|
||||
; CHECK: stp x3, x4, [sp, #40]
|
||||
; CHECK: stp x5, x6, [sp, #56]
|
||||
; CHECK: str x7, [sp, #72]
|
||||
; CHECK: str x8, [sp, #8]
|
||||
; CHECK: bl other_func
|
||||
; CHECK: ldr x30, [sp], #80
|
||||
@ -78,10 +78,10 @@ entry:
|
||||
; CHECK-LABEL: copy1:
|
||||
; CHECK: sub sp, sp, #80
|
||||
; CHECK: add x8, sp, #24
|
||||
; CHECK: stp x6, x7, [sp, #64]
|
||||
; CHECK: stp x4, x5, [sp, #48]
|
||||
; CHECK: stp x2, x3, [sp, #32]
|
||||
; CHECK: str x1, [sp, #24]
|
||||
; CHECK: stp x1, x2, [sp, #24]
|
||||
; CHECK: stp x3, x4, [sp, #40]
|
||||
; CHECK: stp x5, x6, [sp, #56]
|
||||
; CHECK: str x7, [sp, #72]
|
||||
; CHECK: stp x8, x8, [sp], #80
|
||||
; CHECK: ret
|
||||
define void @copy1(i64 %a0, ...) nounwind {
|
||||
@ -111,9 +111,9 @@ declare i64* @__local_stdio_printf_options() local_unnamed_addr #4
|
||||
; CHECK: mov x19, x2
|
||||
; CHECK: mov x20, x1
|
||||
; CHECK: mov x21, x0
|
||||
; CHECK: stp x6, x7, [x29, #48]
|
||||
; CHECK: stp x4, x5, [x29, #32]
|
||||
; CHECK: str x3, [x29, #24]
|
||||
; CHECK: stp x3, x4, [x29, #24]
|
||||
; CHECK: stp x5, x6, [x29, #40]
|
||||
; CHECK: str x7, [x29, #56]
|
||||
; CHECK: str x8, [sp, #8]
|
||||
; CHECK: bl __local_stdio_printf_options
|
||||
; CHECK: ldr x8, [x0]
|
||||
@ -162,9 +162,9 @@ attributes #6 = { "no-frame-pointer-elim"="true" }
|
||||
; CHECK: lsr x15, x8, #4
|
||||
; CHECK: mov x19, x1
|
||||
; CHECK: mov [[REG2:x[0-9]+]], sp
|
||||
; CHECK: stp x6, x7, [x29, #48]
|
||||
; CHECK: stp x4, x5, [x29, #32]
|
||||
; CHECK: stp x2, x3, [x29, #16]
|
||||
; CHECK: stp x4, x5, [x29, #32]
|
||||
; CHECK: stp x6, x7, [x29, #48]
|
||||
; CHECK: bl __chkstk
|
||||
; CHECK: mov x8, sp
|
||||
; CHECK: sub [[REG:x[0-9]+]], x8, x15, lsl #4
|
||||
@ -219,9 +219,9 @@ declare void @llvm.stackrestore(i8*)
|
||||
; CHECK-DAG: mov x19, x2
|
||||
; CHECK-DAG: mov x20, x1
|
||||
; CHECK-DAG: mov x21, x0
|
||||
; CHECK-DAG: stp x6, x7, [sp, #80]
|
||||
; CHECK-DAG: stp x4, x5, [sp, #64]
|
||||
; CHECK-DAG: str x3, [sp, #56]
|
||||
; CHECK-DAG: stp x3, x4, [sp, #56]
|
||||
; CHECK-DAG: stp x5, x6, [sp, #72]
|
||||
; CHECK-DAG: str x7, [sp, #88]
|
||||
; CHECK-DAG: str x8, [sp, #8]
|
||||
; CHECK-DAG: bl __local_stdio_printf_options
|
||||
; CHECK-DAG: ldr x8, [x0]
|
||||
|
@ -752,8 +752,8 @@ entry:
|
||||
; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:8
|
||||
; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
|
||||
; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8
|
||||
; GCN: s_getpc_b64
|
||||
; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
|
||||
|
@ -42,8 +42,8 @@ declare void @f(double);
|
||||
|
||||
; CHECK-LABEL: test_byval_8_bytes_alignment_fixed_arg:
|
||||
; CHECK-NOT: str r1
|
||||
; CHECK: str r3, [sp, #12]
|
||||
; CHECK: str r2, [sp, #8]
|
||||
; CHECK-DAG: str r3, [sp, #12]
|
||||
; CHECK-DAG: str r2, [sp, #8]
|
||||
; CHECK-NOT: str r1
|
||||
define void @test_byval_8_bytes_alignment_fixed_arg(i32 %n1, %struct_t* byval %val) nounwind {
|
||||
entry:
|
||||
|
@ -7,8 +7,8 @@
|
||||
declare i32 @printf(i8*, ...)
|
||||
|
||||
; CHECK-LABEL: test_byval_usage_scheduling:
|
||||
; CHECK: str r3, [sp, #12]
|
||||
; CHECK: str r2, [sp, #8]
|
||||
; CHECK-DAG: str r3, [sp, #12]
|
||||
; CHECK-DAG: str r2, [sp, #8]
|
||||
; CHECK: vldr d16, [sp, #8]
|
||||
define void @test_byval_usage_scheduling(i32 %n1, i32 %n2, %struct_t* byval %val) nounwind {
|
||||
entry:
|
||||
|
@ -35,8 +35,8 @@ define void @foo2(i32 %a, %struct8bytes8align* byval %b) {
|
||||
; CHECK: sub sp, sp, #8
|
||||
; CHECK: push {r11, lr}
|
||||
; CHECK: add r0, sp, #8
|
||||
; CHECK: str r3, [sp, #12]
|
||||
; CHECK: str r2, [sp, #8]
|
||||
; CHECK-DAG: str r3, [sp, #12]
|
||||
; CHECK-DAG: str r2, [sp, #8]
|
||||
; CHECK: bl usePtr
|
||||
; CHECK: pop {r11, lr}
|
||||
; CHECK: add sp, sp, #8
|
||||
@ -70,8 +70,8 @@ define void @foo4(%struct4bytes* byval %a, %struct8bytes8align* byval %b) {
|
||||
; CHECK: push {r11, lr}
|
||||
; CHECK: str r0, [sp, #8]
|
||||
; CHECK: add r0, sp, #16
|
||||
; CHECK: str r3, [sp, #20]
|
||||
; CHECK: str r2, [sp, #16]
|
||||
; CHECK-DAG: str r3, [sp, #20]
|
||||
; CHECK-DAG: str r2, [sp, #16]
|
||||
; CHECK: bl usePtr
|
||||
; CHECK: pop {r11, lr}
|
||||
; CHECK: add sp, sp, #16
|
||||
|
@ -25,12 +25,12 @@ entry:
|
||||
; CHECK-7A: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2]
|
||||
; CHECK-6M-LABEL: t2:
|
||||
; CHECK-6M: movs [[REG:r[0-9]+]], #0
|
||||
; CHECK-6M: str [[REG]], [sp, #20]
|
||||
; CHECK-6M: str [[REG]], [sp, #16]
|
||||
; CHECK-6M: str [[REG]], [sp, #12]
|
||||
; CHECK-6M: str [[REG]], [sp, #8]
|
||||
; CHECK-6M: str [[REG]], [sp, #4]
|
||||
; CHECK-6M: str [[REG]], [sp]
|
||||
; CHECK-6M-DAG: str [[REG]], [sp, #20]
|
||||
; CHECK-6M-DAG: str [[REG]], [sp, #16]
|
||||
; CHECK-6M-DAG: str [[REG]], [sp, #12]
|
||||
; CHECK-6M-DAG: str [[REG]], [sp, #8]
|
||||
; CHECK-6M-DAG: str [[REG]], [sp, #4]
|
||||
; CHECK-6M-DAG: str [[REG]], [sp]
|
||||
%buf = alloca [26 x i8], align 1
|
||||
%0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
|
||||
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false)
|
||||
|
@ -57,14 +57,14 @@ entry:
|
||||
|
||||
; Epilogue
|
||||
; --------
|
||||
; CHECK-V4T: ldr [[POP:r[4567]]], [sp, #12]
|
||||
; CHECK-V4T: ldr [[POP:r[4567]]], [sp, #16]
|
||||
; CHECK-V4T-NEXT: mov lr, [[POP]]
|
||||
; CHECK-V4T-NEXT: pop {[[SAVED]]}
|
||||
; CHECK-V4T-NEXT: add sp, #16
|
||||
; CHECK-V4T-NEXT: bx lr
|
||||
; CHECK-V5T: lsls r4
|
||||
; CHECK-V5T-NEXT: mov sp, r4
|
||||
; CHECK-V5T: ldr [[POP:r[4567]]], [sp, #12]
|
||||
; CHECK-V5T: ldr [[POP:r[4567]]], [sp, #16]
|
||||
; CHECK-V5T-NEXT: mov lr, [[POP]]
|
||||
; CHECK-V5T-NEXT: pop {[[SAVED]]}
|
||||
; CHECK-V5T-NEXT: add sp, #16
|
||||
|
@ -13,14 +13,14 @@
|
||||
define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
|
||||
entry:
|
||||
; EXPANDED-LABEL: t:
|
||||
; EXPANDED: ldrb [[R2:r[0-9]+]]
|
||||
; EXPANDED: ldrb [[R3:r[0-9]+]]
|
||||
; EXPANDED: ldrb [[R12:r[0-9]+]]
|
||||
; EXPANDED: ldrb [[R1:r[0-9]+]]
|
||||
; EXPANDED: strb [[R1]]
|
||||
; EXPANDED: strb [[R12]]
|
||||
; EXPANDED: strb [[R3]]
|
||||
; EXPANDED: strb [[R2]]
|
||||
; EXPANDED-DAG: ldrb [[R2:r[0-9]+]]
|
||||
; EXPANDED-DAG: ldrb [[R3:r[0-9]+]]
|
||||
; EXPANDED-DAG: ldrb [[R12:r[0-9]+]]
|
||||
; EXPANDED-DAG: ldrb [[R1:r[0-9]+]]
|
||||
; EXPANDED-DAG: strb [[R1]]
|
||||
; EXPANDED-DAG: strb [[R12]]
|
||||
; EXPANDED-DAG: strb [[R3]]
|
||||
; EXPANDED-DAG: strb [[R2]]
|
||||
|
||||
; UNALIGNED-LABEL: t:
|
||||
; UNALIGNED: ldr r1
|
||||
|
@ -66,24 +66,24 @@ define void @ret_void_args_i64(i64 %a) {
|
||||
|
||||
; CHECK-LABEL: ret_void_args_i64_i64
|
||||
define void @ret_void_args_i64_i64(i64 %a, i64 %b) {
|
||||
; CHECK: sts 11, r25
|
||||
; CHECK-NEXT: sts 10, r24
|
||||
; CHECK-NEXT: sts 9, r23
|
||||
; CHECK-NEXT: sts 8, r22
|
||||
; CHECK-NEXT: sts 7, r21
|
||||
; CHECK-NEXT: sts 6, r20
|
||||
; CHECK-NEXT: sts 5, r19
|
||||
; CHECK-NEXT: sts 4, r18
|
||||
; CHECK-DAG: sts 11, r25
|
||||
; CHECK-DAG: sts 10, r24
|
||||
; CHECK-DAG: sts 9, r23
|
||||
; CHECK-DAG: sts 8, r22
|
||||
; CHECK-DAG: sts 7, r21
|
||||
; CHECK-DAG: sts 6, r20
|
||||
; CHECK-DAG: sts 5, r19
|
||||
; CHECK-DAG: sts 4, r18
|
||||
store volatile i64 %a, i64* inttoptr (i64 4 to i64*)
|
||||
|
||||
; CHECK-NEXT: sts 11, r17
|
||||
; CHECK-NEXT: sts 10, r16
|
||||
; CHECK-NEXT: sts 9, r15
|
||||
; CHECK-NEXT: sts 8, r14
|
||||
; CHECK-NEXT: sts 7, r13
|
||||
; CHECK-NEXT: sts 6, r12
|
||||
; CHECK-NEXT: sts 5, r11
|
||||
; CHECK-NEXT: sts 4, r10
|
||||
; CHECK-DAG: sts 11, r17
|
||||
; CHECK-DAG: sts 10, r16
|
||||
; CHECK-DAG: sts 9, r15
|
||||
; CHECK-DAG: sts 8, r14
|
||||
; CHECK-DAG: sts 7, r13
|
||||
; CHECK-DAG: sts 6, r12
|
||||
; CHECK-DAG: sts 5, r11
|
||||
; CHECK-DAG: sts 4, r10
|
||||
store volatile i64 %b, i64* inttoptr (i64 4 to i64*)
|
||||
ret void
|
||||
}
|
||||
|
@ -207,10 +207,10 @@ define i32 @static32_inc() {
|
||||
; CHECK: sbci r23, 255
|
||||
; CHECK: sbci r24, 255
|
||||
; CHECK: sbci r25, 255
|
||||
; CHECK: sts long.static+3, r25
|
||||
; CHECK: sts long.static+2, r24
|
||||
; CHECK: sts long.static+1, r23
|
||||
; CHECK: sts long.static, r22
|
||||
; CHECK-DAG: sts long.static+3, r25
|
||||
; CHECK-DAG: sts long.static+2, r24
|
||||
; CHECK-DAG: sts long.static+1, r23
|
||||
; CHECK-DAG: sts long.static, r22
|
||||
%1 = load i32, i32* @long.static
|
||||
%inc = add nsw i32 %1, 1
|
||||
store i32 %inc, i32* @long.static
|
||||
@ -309,14 +309,14 @@ define i64 @static64_inc() {
|
||||
; CHECK: sbci r23, 255
|
||||
; CHECK: sbci r24, 255
|
||||
; CHECK: sbci r25, 255
|
||||
; CHECK: sts longlong.static+7, r25
|
||||
; CHECK: sts longlong.static+6, r24
|
||||
; CHECK: sts longlong.static+5, r23
|
||||
; CHECK: sts longlong.static+4, r22
|
||||
; CHECK: sts longlong.static+3, r21
|
||||
; CHECK: sts longlong.static+2, r20
|
||||
; CHECK: sts longlong.static+1, r19
|
||||
; CHECK: sts longlong.static, r18
|
||||
; CHECK-DAG: sts longlong.static+7, r25
|
||||
; CHECK-DAG: sts longlong.static+6, r24
|
||||
; CHECK-DAG: sts longlong.static+5, r23
|
||||
; CHECK-DAG: sts longlong.static+4, r22
|
||||
; CHECK-DAG: sts longlong.static+3, r21
|
||||
; CHECK-DAG: sts longlong.static+2, r20
|
||||
; CHECK-DAG: sts longlong.static+1, r19
|
||||
; CHECK-DAG: sts longlong.static, r18
|
||||
%1 = load i64, i64* @longlong.static
|
||||
%inc = add nsw i64 %1, 1
|
||||
store i64 %inc, i64* @longlong.static
|
||||
|
@ -20,20 +20,20 @@ define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 s
|
||||
; CHECK: *(u64 *)(r10 - 8) = r1
|
||||
|
||||
; CHECK: r1 = 0
|
||||
; CHECK: *(u16 *)(r10 + 24) = r1
|
||||
; CHECK: *(u16 *)(r10 + 22) = r1
|
||||
; CHECK: *(u16 *)(r10 + 20) = r1
|
||||
; CHECK: *(u16 *)(r10 + 18) = r1
|
||||
; CHECK: *(u16 *)(r10 + 16) = r1
|
||||
; CHECK: *(u16 *)(r10 + 14) = r1
|
||||
; CHECK: *(u16 *)(r10 + 12) = r1
|
||||
; CHECK: *(u16 *)(r10 + 10) = r1
|
||||
; CHECK: *(u16 *)(r10 + 8) = r1
|
||||
; CHECK: *(u16 *)(r10 + 6) = r1
|
||||
; CHECK: *(u16 *)(r10 + 4) = r1
|
||||
; CHECK: *(u16 *)(r10 + 2) = r1
|
||||
; CHECK: *(u16 *)(r10 + 0) = r1
|
||||
; CHECK: *(u16 *)(r10 + 26) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 24) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 22) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 20) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 18) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 16) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 14) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 12) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 10) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 8) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 6) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 4) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 2) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 0) = r1
|
||||
; CHECK-DAG: *(u16 *)(r10 + 26) = r1
|
||||
|
||||
; CHECK: r2 = r10
|
||||
; CHECK: r2 += -8
|
||||
|
@ -166,10 +166,10 @@ define void @f_i64_i64(i64 %a, i64 %b) #0 {
|
||||
; CHECK: mov r13, &g_i64+2
|
||||
; CHECK: mov r12, &g_i64
|
||||
store volatile i64 %a, i64* @g_i64, align 2
|
||||
; CHECK: mov 10(r4), &g_i64+6
|
||||
; CHECK: mov 8(r4), &g_i64+4
|
||||
; CHECK: mov 6(r4), &g_i64+2
|
||||
; CHECK: mov 4(r4), &g_i64
|
||||
; CHECK-DAG: mov 10(r4), &g_i64+6
|
||||
; CHECK-DAG: mov 8(r4), &g_i64+4
|
||||
; CHECK-DAG: mov 6(r4), &g_i64+2
|
||||
; CHECK-DAG: mov 4(r4), &g_i64
|
||||
store volatile i64 %b, i64* @g_i64, align 2
|
||||
ret void
|
||||
}
|
||||
|
@ -15,23 +15,19 @@ define float @f(<8 x i16>* %a) {
|
||||
; CHECK-NEXT: .cfi_def_cfa_register 30
|
||||
; CHECK-NEXT: addiu $1, $zero, -16
|
||||
; CHECK-NEXT: and $sp, $sp, $1
|
||||
; CHECK-NEXT: lw $1, 8($4)
|
||||
; CHECK-NEXT: lw $2, 4($4)
|
||||
; CHECK-NEXT: lw $3, 12($4)
|
||||
; CHECK-NEXT: sw $3, 12($sp)
|
||||
; CHECK-NEXT: sw $1, 8($sp)
|
||||
; CHECK-NEXT: sw $2, 4($sp)
|
||||
; CHECK-NEXT: lw $1, 0($4)
|
||||
; CHECK-NEXT: sw $1, 0($sp)
|
||||
; CHECK-NEXT: mtc1 $1, $f0
|
||||
; CHECK-NEXT: lw $1, 12($4)
|
||||
; CHECK-NEXT: lw $2, 0($4)
|
||||
; CHECK-NEXT: lw $3, 8($4)
|
||||
; CHECK-NEXT: sw $3, 8($sp)
|
||||
; CHECK-NEXT: sw $1, 12($sp)
|
||||
; CHECK-NEXT: sw $2, 0($sp)
|
||||
; CHECK-NEXT: lw $1, 4($4)
|
||||
; CHECK-NEXT: sw $1, 4($sp)
|
||||
; CHECK-NEXT: mtc1 $2, $f0
|
||||
; CHECK-NEXT: move $sp, $fp
|
||||
; CHECK-NEXT: lw $fp, 28($sp) # 4-byte Folded Reload
|
||||
; CHECK-NEXT: jr $ra
|
||||
; CHECK-NEXT: addiu $sp, $sp, 32
|
||||
; CHECK-NEXT: .set at
|
||||
; CHECK-NEXT: .set macro
|
||||
; CHECK-NEXT: .set reorder
|
||||
; CHECK-NEXT: .end f
|
||||
entry:
|
||||
%m = alloca <8 x i16>
|
||||
%0 = load <8 x i16>, <8 x i16>* %a
|
||||
|
@ -82,27 +82,27 @@ define fp128 @testStruct_03(%struct.With9fp128params* byval nocapture readonly
|
||||
align 16 %a) {
|
||||
; CHECK-LABEL: testStruct_03:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: std r10, 88(r1)
|
||||
; CHECK-NEXT: std r9, 80(r1)
|
||||
; CHECK-NEXT: std r8, 72(r1)
|
||||
; CHECK-NEXT: std r7, 64(r1)
|
||||
; CHECK-NEXT: std r6, 56(r1)
|
||||
; CHECK-NEXT: std r5, 48(r1)
|
||||
; CHECK-NEXT: std r4, 40(r1)
|
||||
; CHECK-NEXT: std r3, 32(r1)
|
||||
; CHECK-DAG: std r10, 88(r1)
|
||||
; CHECK-DAG: std r9, 80(r1)
|
||||
; CHECK-DAG: std r8, 72(r1)
|
||||
; CHECK-DAG: std r7, 64(r1)
|
||||
; CHECK-DAG: std r6, 56(r1)
|
||||
; CHECK-DAG: std r5, 48(r1)
|
||||
; CHECK-DAG: std r4, 40(r1)
|
||||
; CHECK-DAG: std r3, 32(r1)
|
||||
; CHECK-NEXT: lxv v2, 128(r1)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: testStruct_03:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: std r10, 104(r1)
|
||||
; CHECK-BE-NEXT: std r9, 96(r1)
|
||||
; CHECK-BE-NEXT: std r8, 88(r1)
|
||||
; CHECK-BE-NEXT: std r7, 80(r1)
|
||||
; CHECK-BE-NEXT: std r6, 72(r1)
|
||||
; CHECK-BE-NEXT: std r5, 64(r1)
|
||||
; CHECK-BE-NEXT: std r4, 56(r1)
|
||||
; CHECK-BE-NEXT: std r3, 48(r1)
|
||||
; CHECK-BE-DAG: std r10, 104(r1)
|
||||
; CHECK-BE-DAG: std r9, 96(r1)
|
||||
; CHECK-BE-DAG: std r8, 88(r1)
|
||||
; CHECK-BE-DAG: std r7, 80(r1)
|
||||
; CHECK-BE-DAG: std r6, 72(r1)
|
||||
; CHECK-BE-DAG: std r5, 64(r1)
|
||||
; CHECK-BE-DAG: std r4, 56(r1)
|
||||
; CHECK-BE-DAG: std r3, 48(r1)
|
||||
; CHECK-BE-NEXT: lxv v2, 144(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
@ -256,27 +256,27 @@ entry:
|
||||
define fp128 @testNestedAggregate(%struct.MixedC* byval nocapture readonly align 16 %a) {
|
||||
; CHECK-LABEL: testNestedAggregate:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: std r8, 72(r1)
|
||||
; CHECK-NEXT: std r7, 64(r1)
|
||||
; CHECK-NEXT: std r10, 88(r1)
|
||||
; CHECK-NEXT: std r9, 80(r1)
|
||||
; CHECK-NEXT: std r6, 56(r1)
|
||||
; CHECK-NEXT: std r5, 48(r1)
|
||||
; CHECK-NEXT: std r4, 40(r1)
|
||||
; CHECK-NEXT: std r3, 32(r1)
|
||||
; CHECK-DAG: std r10, 88(r1)
|
||||
; CHECK-DAG: std r9, 80(r1)
|
||||
; CHECK-DAG: std r8, 72(r1)
|
||||
; CHECK-DAG: std r7, 64(r1)
|
||||
; CHECK-DAG: std r6, 56(r1)
|
||||
; CHECK-DAG: std r5, 48(r1)
|
||||
; CHECK-DAG: std r4, 40(r1)
|
||||
; CHECK-DAG: std r3, 32(r1)
|
||||
; CHECK-NEXT: lxv v2, 64(r1)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: testNestedAggregate:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: std r8, 88(r1)
|
||||
; CHECK-BE-NEXT: std r7, 80(r1)
|
||||
; CHECK-BE-NEXT: std r10, 104(r1)
|
||||
; CHECK-BE-NEXT: std r9, 96(r1)
|
||||
; CHECK-BE-NEXT: std r6, 72(r1)
|
||||
; CHECK-BE-NEXT: std r5, 64(r1)
|
||||
; CHECK-BE-NEXT: std r4, 56(r1)
|
||||
; CHECK-BE-NEXT: std r3, 48(r1)
|
||||
; CHECK-BE-DAG: std r8, 88(r1)
|
||||
; CHECK-BE-DAG: std r7, 80(r1)
|
||||
; CHECK-BE-DAG: std r10, 104(r1)
|
||||
; CHECK-BE-DAG: std r9, 96(r1)
|
||||
; CHECK-BE-DAG: std r6, 72(r1)
|
||||
; CHECK-BE-DAG: std r5, 64(r1)
|
||||
; CHECK-BE-DAG: std r4, 56(r1)
|
||||
; CHECK-BE-DAG: std r3, 48(r1)
|
||||
; CHECK-BE-NEXT: lxv v2, 80(r1)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
@ -337,17 +337,17 @@ entry:
|
||||
define fp128 @sum_float128(i32 signext %count, ...) {
|
||||
; CHECK-LABEL: sum_float128:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: std r10, 88(r1)
|
||||
; CHECK-NEXT: std r9, 80(r1)
|
||||
; CHECK-NEXT: std r8, 72(r1)
|
||||
; CHECK-NEXT: std r7, 64(r1)
|
||||
; CHECK-NEXT: std r6, 56(r1)
|
||||
; CHECK-NEXT: cmpwi cr0, r3, 1
|
||||
; CHECK-NEXT: std r4, 40(r1)
|
||||
; CHECK-NEXT: addis [[REG:r[0-9]+]], r2, .LCPI17_0@toc@ha
|
||||
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG]], .LCPI17_0@toc@l
|
||||
; CHECK-NEXT: lxvx v2, 0, [[REG1]]
|
||||
; CHECK-NEXT: std r5, 48(r1)
|
||||
; CHECK-DAG: std r10, 88(r1)
|
||||
; CHECK-DAG: std r9, 80(r1)
|
||||
; CHECK-DAG: std r8, 72(r1)
|
||||
; CHECK-DAG: std r7, 64(r1)
|
||||
; CHECK-DAG: std r6, 56(r1)
|
||||
; CHECK-DAG: std r4, 40(r1)
|
||||
; CHECK-DAG: cmpwi cr0, r3, 1
|
||||
; CHECK-DAG: std r5, 48(r1)
|
||||
; CHECK-DAG: addis [[REG:r[0-9]+]], r2, .LCPI17_0@toc@ha
|
||||
; CHECK-DAG: addi [[REG1:r[0-9]+]], [[REG]], .LCPI17_0@toc@l
|
||||
; CHECK-DAG: lxvx v2, 0, [[REG1]]
|
||||
; CHECK-NEXT: bltlr cr0
|
||||
; CHECK-NEXT: # %bb.1: # %if.end
|
||||
; CHECK-NEXT: addi r3, r1, 40
|
||||
|
@ -34,7 +34,7 @@ entry:
|
||||
ret i64 %0
|
||||
}
|
||||
; CHECK-LABEL: @callee2
|
||||
; CHECK: ld 3, 128(1)
|
||||
; CHECK: ld {{[0-9]+}}, 128(1)
|
||||
; CHECK: blr
|
||||
|
||||
declare i64 @test2(%struct.pad* byval, i32 signext, %struct.test* byval align 16)
|
||||
|
@ -173,9 +173,9 @@ entry:
|
||||
; Setup frame pointer
|
||||
; CHECK: add r7, sp, #8
|
||||
; Register varargs stored via FP
|
||||
; CHECK: str r3, [r7, #16]
|
||||
; CHECK-NEXT: str r2, [r7, #12]
|
||||
; CHECK-NEXT: str r1, [r7, #8]
|
||||
; CHECK-DAG: str r3, [r7, #16]
|
||||
; CHECK-DAG: str r2, [r7, #12]
|
||||
; CHECK-DAG: str r1, [r7, #8]
|
||||
|
||||
; Moving SP, access via SP
|
||||
; int test_args_moving_sp(int a, int b, int c, int d, int e) {
|
||||
|
@ -194,26 +194,26 @@ for.cond.cleanup:
|
||||
|
||||
define void @test128(i128* %a) {
|
||||
; CHECK-LABEL: test128:
|
||||
; CHECK: ldr r1, [r0, #4]
|
||||
; CHECK: ldr r1, [r0, #8]
|
||||
; CHECK-NEXT: ldr r2, .LCPI8_0
|
||||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: str r2, [r0, #4]
|
||||
; CHECK-NEXT: str r2, [r0, #8]
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: ldr r2, .LCPI8_1
|
||||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: str r2, [r0]
|
||||
; CHECK-NEXT: ldr r1, [r0, #8]
|
||||
; CHECK-NEXT: ldr r1, [r0, #4]
|
||||
; CHECK-NEXT: ldr r2, .LCPI8_2
|
||||
; CHECK-NEXT: eors r2, r1
|
||||
; CHECK-NEXT: str r2, [r0, #8]
|
||||
; CHECK-NEXT: str r2, [r0, #4]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .p2align 2
|
||||
; CHECK-NEXT: .LCPI8_0:
|
||||
; CHECK-NEXT: .long 4075008415
|
||||
; CHECK-NEXT: .long 6692605
|
||||
; CHECK-NEXT: .LCPI8_1:
|
||||
; CHECK-NEXT: .long 2080661269
|
||||
; CHECK-NEXT: .LCPI8_2:
|
||||
; CHECK-NEXT: .long 6692605
|
||||
; CHECK-NEXT: .long 4075008415
|
||||
%x = load i128, i128* %a
|
||||
%xn = xor i128 %x, 123456789123456789123456789
|
||||
store i128 %xn, i128* %a
|
||||
|
@ -17,7 +17,7 @@ entry:
|
||||
store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
|
||||
; X32: leal 68(%esp), [[REG:%.*]]
|
||||
; X32: movl [[REG]], 16(%esp)
|
||||
; X64: leaq 232(%rsp), [[REG:%.*]]
|
||||
; X64: leaq 256(%rsp), [[REG:%.*]]
|
||||
; X64: movq [[REG]], 184(%rsp)
|
||||
; X64: leaq 176(%rsp), %rdi
|
||||
call void @qux(%struct.__va_list_tag* %arraydecay)
|
||||
|
@ -132,10 +132,10 @@ define i128 @or128(i128* %p) {
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X32-NEXT: movl %edi, 12(%esi)
|
||||
; X32-NEXT: movl %edx, 8(%esi)
|
||||
; X32-NEXT: movl %ecx, 4(%esi)
|
||||
; X32-NEXT: movl %edi, 8(%esi)
|
||||
; X32-NEXT: movl %edx, 12(%esi)
|
||||
; X32-NEXT: movl %eax, (%esi)
|
||||
; X32-NEXT: movl %ecx, 4(%esi)
|
||||
; X32-NEXT: movl %esi, %eax
|
||||
; X32-NEXT: leal -8(%ebp), %esp
|
||||
; X32-NEXT: popl %esi
|
||||
|
@ -245,8 +245,8 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups (%rsi), %xmm0
|
||||
; CHECK-NEXT: vmovups 16(%rsi), %xmm1
|
||||
; CHECK-NEXT: vmovups %xmm0, (%rdi)
|
||||
; CHECK-NEXT: vmovups %xmm1, 16(%rdi)
|
||||
; CHECK-NEXT: vmovups %xmm0, (%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; CHECK_O0-LABEL: add8i32:
|
||||
@ -290,8 +290,8 @@ define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps (%rsi), %xmm0
|
||||
; CHECK-NEXT: vmovaps 16(%rsi), %xmm1
|
||||
; CHECK-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; CHECK-NEXT: vmovaps %xmm1, 16(%rdi)
|
||||
; CHECK-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; CHECK_O0-LABEL: add4i64a16:
|
||||
|
@ -859,8 +859,8 @@ define void @btr_64_dont_fold(i64* %x, i64 %n) {
|
||||
; X86-NEXT: .LBB33_2:
|
||||
; X86-NEXT: notl %esi
|
||||
; X86-NEXT: notl %edx
|
||||
; X86-NEXT: andl %esi, 4(%eax)
|
||||
; X86-NEXT: andl %edx, (%eax)
|
||||
; X86-NEXT: andl %esi, 4(%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
@ -899,8 +899,8 @@ define void @bts_64_dont_fold(i64* %x, i64 %n) {
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: .LBB34_2:
|
||||
; X86-NEXT: orl %esi, 4(%eax)
|
||||
; X86-NEXT: orl %edx, (%eax)
|
||||
; X86-NEXT: orl %esi, 4(%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
@ -938,8 +938,8 @@ define void @btc_64_dont_fold(i64* %x, i64 %n) {
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: .LBB35_2:
|
||||
; X86-NEXT: xorl %esi, 4(%eax)
|
||||
; X86-NEXT: xorl %edx, (%eax)
|
||||
; X86-NEXT: xorl %esi, 4(%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
|
@ -77,8 +77,8 @@ define void @PR25858_i64(%WideUInt64* sret, %WideUInt64*, %WideUInt64*) nounwind
|
||||
; X86-NEXT: movzbl %bl, %ecx
|
||||
; X86-NEXT: subl %ecx, %edx
|
||||
; X86-NEXT: sbbl $0, %ebp
|
||||
; X86-NEXT: movl %edi, 4(%eax)
|
||||
; X86-NEXT: movl %esi, (%eax)
|
||||
; X86-NEXT: movl %edi, 4(%eax)
|
||||
; X86-NEXT: movl %edx, 8(%eax)
|
||||
; X86-NEXT: movl %ebp, 12(%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
|
@ -19,7 +19,7 @@ define void @PR22524({ float, float }* %arg) {
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: mulss %xmm0, %xmm1
|
||||
; CHECK-NEXT: movq $0, (%rdi)
|
||||
; CHECK-NEXT: movl $0, (%rdi)
|
||||
; CHECK-NEXT: movss %xmm1, 4(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -8,10 +8,10 @@ define void @add256(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "min-legal-v
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1
|
||||
; CHECK-NEXT: vpaddd (%rsi), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpaddd 32(%rsi), %ymm1, %ymm1
|
||||
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx)
|
||||
; CHECK-NEXT: vpaddd (%rsi), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqa %ymm0, (%rdx)
|
||||
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%d = load <16 x i32>, <16 x i32>* %a
|
||||
@ -85,10 +85,10 @@ define void @pmaddwd_32_256(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16 x i32>* %C
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1
|
||||
; CHECK-NEXT: vpmaddwd (%rsi), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpmaddwd 32(%rsi), %ymm1, %ymm1
|
||||
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx)
|
||||
; CHECK-NEXT: vpmaddwd (%rsi), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqa %ymm0, (%rdx)
|
||||
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%A = load <32 x i16>, <32 x i16>* %APtr
|
||||
@ -128,10 +128,10 @@ define void @psubus_64i8_max_256(<64 x i8>* %xptr, <64 x i8>* %yptr, <64 x i8>*
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1
|
||||
; CHECK-NEXT: vpsubusb (%rsi), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpsubusb 32(%rsi), %ymm1, %ymm1
|
||||
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx)
|
||||
; CHECK-NEXT: vpsubusb (%rsi), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqa %ymm0, (%rdx)
|
||||
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%x = load <64 x i8>, <64 x i8>* %xptr
|
||||
@ -652,27 +652,27 @@ define void @mul256(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-legal-vect
|
||||
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1
|
||||
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
|
||||
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; CHECK-NEXT: vpmullw %ymm4, %ymm5, %ymm4
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; CHECK-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; CHECK-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; CHECK-NEXT: vpmullw %ymm2, %ymm4, %ymm2
|
||||
; CHECK-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
|
||||
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; CHECK-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx)
|
||||
; CHECK-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; CHECK-NEXT: vpmullw %ymm3, %ymm4, %ymm3
|
||||
; CHECK-NEXT: vpand %ymm5, %ymm3, %ymm3
|
||||
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; CHECK-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovdqa %ymm0, (%rdx)
|
||||
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%d = load <64 x i8>, <64 x i8>* %a
|
||||
|
@ -56,11 +56,11 @@ define void @f_thunk(i8* %this, ...) {
|
||||
; LINUX-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: .LBB0_2:
|
||||
; LINUX-NEXT: movq %r15, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movq %r12, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movq %r13, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movq %rbp, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movq %rbp, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movq %r13, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movq %r12, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: movq %r15, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; LINUX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; LINUX-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
@ -150,11 +150,11 @@ define void @f_thunk(i8* %this, ...) {
|
||||
; LINUX-X32-NEXT: movaps %xmm6, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movaps %xmm7, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: .LBB0_2:
|
||||
; LINUX-X32-NEXT: movq %r15, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movq %r12, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movq %r13, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movq %rbp, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movq %rbx, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movq %rbp, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movq %r13, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movq %r12, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: movq %r15, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: leal {{[0-9]+}}(%rsp), %eax
|
||||
; LINUX-X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; LINUX-X32-NEXT: leal {{[0-9]+}}(%rsp), %eax
|
||||
@ -223,9 +223,9 @@ define void @f_thunk(i8* %this, ...) {
|
||||
; WINDOWS-NEXT: movq %r8, %rdi
|
||||
; WINDOWS-NEXT: movq %rdx, %rbx
|
||||
; WINDOWS-NEXT: movq %rcx, %rbp
|
||||
; WINDOWS-NEXT: movq %r9, {{[0-9]+}}(%rsp)
|
||||
; WINDOWS-NEXT: movq %r8, {{[0-9]+}}(%rsp)
|
||||
; WINDOWS-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
|
||||
; WINDOWS-NEXT: movq %r8, {{[0-9]+}}(%rsp)
|
||||
; WINDOWS-NEXT: movq %r9, {{[0-9]+}}(%rsp)
|
||||
; WINDOWS-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; WINDOWS-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
||||
; WINDOWS-NEXT: callq get_f
|
||||
|
@ -46,8 +46,8 @@ define i32 @t4({}* %fn, i32 %n, i32 %r) {
|
||||
; CHECK-LABEL: t4:
|
||||
; CHECK: incl %[[r:.*]]
|
||||
; CHECK: decl %[[n:.*]]
|
||||
; CHECK: movl %[[r]], {{[0-9]+}}(%esp)
|
||||
; CHECK: movl %[[n]], {{[0-9]+}}(%esp)
|
||||
; CHECK-DAG: movl %[[r]], {{[0-9]+}}(%esp)
|
||||
; CHECK-DAG: movl %[[n]], {{[0-9]+}}(%esp)
|
||||
; CHECK: jmpl *%{{.*}}
|
||||
|
||||
entry:
|
||||
@ -71,8 +71,8 @@ define i32 @t5({}* %fn, i32 %n, i32 %r) alignstack(32) {
|
||||
; CHECK: incl %[[r:.*]]
|
||||
; CHECK: decl %[[n:.*]]
|
||||
; Store them through ebp, since that's the only stable arg pointer.
|
||||
; CHECK: movl %[[r]], {{[0-9]+}}(%ebp)
|
||||
; CHECK: movl %[[n]], {{[0-9]+}}(%ebp)
|
||||
; CHECK-DAG: movl %[[r]], {{[0-9]+}}(%ebp)
|
||||
; CHECK-DAG: movl %[[n]], {{[0-9]+}}(%ebp)
|
||||
; Epilogue.
|
||||
; CHECK: leal {{[-0-9]+}}(%ebp), %esp
|
||||
; CHECK: popl %esi
|
||||
|
@ -146,7 +146,7 @@ define void @sitofp_4i64_4f32_mem(<4 x i64>* %p0, <4 x float>* %p1) nounwind {
|
||||
; X32-NEXT: subl $48, %esp
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: movl 24(%eax), %ecx
|
||||
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: movl 28(%eax), %ecx
|
||||
; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl 16(%eax), %esi
|
||||
@ -163,7 +163,7 @@ define void @sitofp_4i64_4f32_mem(<4 x i64>* %p0, <4 x float>* %p1) nounwind {
|
||||
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl (%esp), %eax # 4-byte Reload
|
||||
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
|
||||
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl 12(%ebp), %eax
|
||||
; X32-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
@ -277,10 +277,10 @@ define void @add_2i64_mem(<2 x i64>* %p0, <2 x i64>* %p1, <2 x i64>* %p2) nounwi
|
||||
; X32-NEXT: adcl 4(%ecx), %edx
|
||||
; X32-NEXT: addl 8(%ecx), %edi
|
||||
; X32-NEXT: adcl 12(%ecx), %esi
|
||||
; X32-NEXT: movl %esi, 12(%eax)
|
||||
; X32-NEXT: movl %edi, 8(%eax)
|
||||
; X32-NEXT: movl %edx, 4(%eax)
|
||||
; X32-NEXT: movl %esi, 12(%eax)
|
||||
; X32-NEXT: movl %ebx, (%eax)
|
||||
; X32-NEXT: movl %edx, 4(%eax)
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %edi
|
||||
; X32-NEXT: popl %ebx
|
||||
|
@ -1497,111 +1497,111 @@ define void @interleave_24i32_out(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2
|
||||
define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2, <8 x i32>* %q3) nounwind {
|
||||
; SSE2-LABEL: interleave_24i32_in:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movups (%rsi), %xmm5
|
||||
; SSE2-NEXT: movups 16(%rsi), %xmm8
|
||||
; SSE2-NEXT: movups (%rdx), %xmm6
|
||||
; SSE2-NEXT: movups 16(%rdx), %xmm3
|
||||
; SSE2-NEXT: movups (%rcx), %xmm0
|
||||
; SSE2-NEXT: movups 16(%rcx), %xmm4
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm7
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm5[1,0]
|
||||
; SSE2-NEXT: movaps %xmm5, %xmm1
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm7[0,2]
|
||||
; SSE2-NEXT: movups (%rsi), %xmm1
|
||||
; SSE2-NEXT: movups 16(%rsi), %xmm0
|
||||
; SSE2-NEXT: movups (%rdx), %xmm8
|
||||
; SSE2-NEXT: movups 16(%rdx), %xmm5
|
||||
; SSE2-NEXT: movups (%rcx), %xmm3
|
||||
; SSE2-NEXT: movups 16(%rcx), %xmm6
|
||||
; SSE2-NEXT: movaps %xmm3, %xmm7
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm1[1,0]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm9
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm9 = xmm9[0],xmm8[0],xmm9[1],xmm8[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,1],xmm7[0,2]
|
||||
; SSE2-NEXT: movaps %xmm5, %xmm7
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[2,0],xmm6[2,1]
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm6[1,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[3,2],xmm6[3,2]
|
||||
; SSE2-NEXT: movaps %xmm6, %xmm4
|
||||
; SSE2-NEXT: unpckhps {{.*#+}} xmm4 = xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3],xmm7[0,2]
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm7
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[2,0],xmm5[2,1]
|
||||
; SSE2-NEXT: movaps %xmm6, %xmm2
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm5[1,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm7[0,2]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[3,2],xmm0[3,2]
|
||||
; SSE2-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm5[2],xmm0[3],xmm5[3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm6[0,2]
|
||||
; SSE2-NEXT: movaps %xmm4, %xmm5
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[1,0]
|
||||
; SSE2-NEXT: movaps %xmm8, %xmm6
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm5[0,2]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,0],xmm0[1,0]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0,2]
|
||||
; SSE2-NEXT: movaps %xmm8, %xmm5
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm3[2,1]
|
||||
; SSE2-NEXT: movaps %xmm4, %xmm7
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,0],xmm3[1,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[2,0],xmm5[0,2]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,2],xmm4[3,2]
|
||||
; SSE2-NEXT: unpckhps {{.*#+}} xmm4 = xmm4[2],xmm8[2],xmm4[3],xmm8[3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3],xmm3[0,2]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[3,2],xmm3[3,2]
|
||||
; SSE2-NEXT: movaps %xmm3, %xmm6
|
||||
; SSE2-NEXT: unpckhps {{.*#+}} xmm6 = xmm6[2],xmm1[2],xmm6[3],xmm1[3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,3],xmm5[0,2]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm8[2,1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm8[1,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm1[0,2]
|
||||
; SSE2-NEXT: movups %xmm3, 16(%rdi)
|
||||
; SSE2-NEXT: movups %xmm6, 32(%rdi)
|
||||
; SSE2-NEXT: movups %xmm0, 48(%rdi)
|
||||
; SSE2-NEXT: movups %xmm2, 64(%rdi)
|
||||
; SSE2-NEXT: movups %xmm4, 80(%rdi)
|
||||
; SSE2-NEXT: movups %xmm7, 64(%rdi)
|
||||
; SSE2-NEXT: movups %xmm6, 48(%rdi)
|
||||
; SSE2-NEXT: movups %xmm0, 32(%rdi)
|
||||
; SSE2-NEXT: movups %xmm2, 16(%rdi)
|
||||
; SSE2-NEXT: movups %xmm1, (%rdi)
|
||||
; SSE2-NEXT: movups %xmm9, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE42-LABEL: interleave_24i32_in:
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: movdqu (%rsi), %xmm5
|
||||
; SSE42-NEXT: movdqu 16(%rsi), %xmm2
|
||||
; SSE42-NEXT: movdqu (%rdx), %xmm6
|
||||
; SSE42-NEXT: movdqu 16(%rdx), %xmm1
|
||||
; SSE42-NEXT: movdqu (%rcx), %xmm7
|
||||
; SSE42-NEXT: movdqu 16(%rcx), %xmm4
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,1,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm0[2,3],xmm3[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5],xmm3[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,2,2]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm7[2,3],xmm3[4,5,6,7]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm5[4,5],xmm3[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm7[2,3,2,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0,1],xmm5[2,3],xmm7[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm6[2,2,3,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm5 = xmm7[0,1,2,3],xmm5[4,5],xmm7[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,1,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm2[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0,1],xmm6[2,3],xmm7[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm6 = xmm7[0,1,2,3],xmm6[4,5],xmm7[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,2,2]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0,1],xmm4[2,3],xmm7[4,5,6,7]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0,1,2,3],xmm2[4,5],xmm7[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm2[2,3],xmm4[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm1[4,5],xmm4[6,7]
|
||||
; SSE42-NEXT: movdqu %xmm1, 80(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm7, 64(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm6, 48(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm5, 32(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm3, 16(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm0, (%rdi)
|
||||
; SSE42-NEXT: movdqu (%rsi), %xmm8
|
||||
; SSE42-NEXT: movdqu 16(%rsi), %xmm4
|
||||
; SSE42-NEXT: movdqu (%rdx), %xmm2
|
||||
; SSE42-NEXT: movdqu 16(%rdx), %xmm5
|
||||
; SSE42-NEXT: movdqu (%rcx), %xmm3
|
||||
; SSE42-NEXT: movdqu 16(%rcx), %xmm6
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,1,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm8[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0,1],xmm1[2,3],xmm7[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm7[0,1,2,3],xmm1[4,5],xmm7[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,2,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm7[2,3],xmm0[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,2,3,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm0[0,1,2,3],xmm7[4,5],xmm0[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,2,2]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3],xmm0[4,5,6,7]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5],xmm0[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,0,1,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm5[2,3],xmm4[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm6[0,1,0,1]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0,1,2,3],xmm5[4,5],xmm4[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm4 = xmm8[2,3,0,1]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm6 = xmm6[0,1],xmm4[2,3],xmm6[4,5,6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,2,3,3]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5],xmm6[6,7]
|
||||
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,2]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5,6,7]
|
||||
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm8[4,5],xmm2[6,7]
|
||||
; SSE42-NEXT: movdqu %xmm2, 16(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm4, 32(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm5, 48(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm0, 64(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm7, 80(%rdi)
|
||||
; SSE42-NEXT: movdqu %xmm1, (%rdi)
|
||||
; SSE42-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: interleave_24i32_in:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovupd (%rsi), %ymm0
|
||||
; AVX1-NEXT: vmovupd (%rcx), %ymm1
|
||||
; AVX1-NEXT: vmovups (%rdx), %xmm2
|
||||
; AVX1-NEXT: vmovups 16(%rdx), %xmm3
|
||||
; AVX1-NEXT: vmovups (%rsi), %xmm4
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,0],xmm2[2,0]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm2[1,1],xmm5[0,2]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm4[0,0]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm4[2,1]
|
||||
; AVX1-NEXT: vmovups 16(%rcx), %xmm2
|
||||
; AVX1-NEXT: vmovups (%rdx), %xmm3
|
||||
; AVX1-NEXT: vmovups 16(%rdx), %xmm4
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm4[3,0],xmm2[3,0]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm2[2,1],xmm5[0,2]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,0],xmm4[1,0]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm4[2,2]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = xmm1[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
|
||||
; AVX1-NEXT: vmovups 16(%rcx), %xmm4
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm3[3,0],xmm4[3,0]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,1],xmm5[0,2]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm4[1,0],xmm3[1,0]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm4[2,0],xmm3[2,2]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm4 = ymm0[1,1,3,3]
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm4[2,3,2,3]
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
|
||||
; AVX1-NEXT: vmovups (%rsi), %xmm4
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,0],xmm3[2,0]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm3[1,1],xmm5[0,2]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm3[0,0],xmm4[0,0]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm3[2,0],xmm4[2,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = xmm1[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7]
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
|
||||
@ -1609,8 +1609,8 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,0,3,3,4,4,7,7]
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
|
||||
; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm3, 64(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm2, (%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm3, (%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm2, 64(%rdi)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -1653,19 +1653,19 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2,3],ymm4[4],ymm3[5,6],ymm4[7]
|
||||
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[0,3,3,3]
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7]
|
||||
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [1,0,2,2,1,0,2,2]
|
||||
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm4
|
||||
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm0[0,0,2,1]
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7]
|
||||
; AVX2-FAST-NEXT: vbroadcastsd (%rcx), %ymm5
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7]
|
||||
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
|
||||
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[1,1,2,2]
|
||||
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[1,1,2,2]
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3],ymm0[4],ymm2[5,6],ymm0[7]
|
||||
; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm4[1],ymm2[2,3],ymm4[4],ymm2[5,6],ymm4[7]
|
||||
; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[0,0,3,3,4,4,7,7]
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
|
||||
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [1,0,2,2,1,0,2,2]
|
||||
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm1
|
||||
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1]
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
|
||||
; AVX2-FAST-NEXT: vbroadcastsd (%rcx), %ymm1
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
|
||||
; AVX2-FAST-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX2-FAST-NEXT: vmovups %ymm4, (%rdi)
|
||||
; AVX2-FAST-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX2-FAST-NEXT: vmovups %ymm2, 32(%rdi)
|
||||
; AVX2-FAST-NEXT: vmovups %ymm3, 64(%rdi)
|
||||
; AVX2-FAST-NEXT: vzeroupper
|
||||
; AVX2-FAST-NEXT: retq
|
||||
@ -1674,32 +1674,32 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovupd (%rsi), %ymm0
|
||||
; XOP-NEXT: vmovups (%rcx), %ymm1
|
||||
; XOP-NEXT: vmovups (%rdx), %xmm2
|
||||
; XOP-NEXT: vmovups 16(%rdx), %xmm3
|
||||
; XOP-NEXT: vmovups (%rsi), %xmm4
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,0],xmm2[2,0]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm2[1,1],xmm5[0,2]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm4[0,0]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm4[2,1]
|
||||
; XOP-NEXT: vmovups 16(%rcx), %xmm2
|
||||
; XOP-NEXT: vmovups (%rdx), %xmm3
|
||||
; XOP-NEXT: vmovups 16(%rdx), %xmm4
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm4[3,0],xmm2[3,0]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm2[2,1],xmm5[0,2]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,0],xmm4[1,0]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm4[2,2]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
|
||||
; XOP-NEXT: vmovddup {{.*#+}} xmm4 = xmm1[0,0]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4
|
||||
; XOP-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
|
||||
; XOP-NEXT: vmovups 16(%rcx), %xmm4
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm3[3,0],xmm4[3,0]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,1],xmm5[0,2]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm4 = xmm4[1,0],xmm3[1,0]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm3 = xmm4[2,0],xmm3[2,2]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
|
||||
; XOP-NEXT: vpermilpd {{.*#+}} ymm4 = ymm0[1,1,3,3]
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm4[2,3,2,3]
|
||||
; XOP-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
|
||||
; XOP-NEXT: vmovups (%rsi), %xmm4
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,0],xmm3[2,0]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm3[1,1],xmm5[0,2]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm3 = xmm3[0,0],xmm4[0,0]
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm3 = xmm3[2,0],xmm4[2,1]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
|
||||
; XOP-NEXT: vmovddup {{.*#+}} xmm4 = xmm1[0,0]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4
|
||||
; XOP-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7]
|
||||
; XOP-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm1[2],ymm0[3],ymm1[2,3],ymm0[4],ymm1[5,4],ymm0[5]
|
||||
; XOP-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,0,3,3,4,4,7,7]
|
||||
; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
|
||||
; XOP-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; XOP-NEXT: vmovups %ymm3, 64(%rdi)
|
||||
; XOP-NEXT: vmovups %ymm2, (%rdi)
|
||||
; XOP-NEXT: vmovups %ymm3, (%rdi)
|
||||
; XOP-NEXT: vmovups %ymm2, 64(%rdi)
|
||||
; XOP-NEXT: vzeroupper
|
||||
; XOP-NEXT: retq
|
||||
%s1 = load <8 x i32>, <8 x i32>* %q1, align 4
|
||||
|
@ -12,13 +12,12 @@ define i32 @ipt_do_table(%struct.sk_buff* noalias nocapture readonly) {
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq $170, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [12297829382473034410,12297829382473034410]
|
||||
; CHECK-NEXT: movaps %xmm0, (%rsp)
|
||||
; CHECK-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
|
||||
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movb $-86, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movzwl 2(%rax), %ecx
|
||||
; CHECK-NEXT: andl $8191, %ecx # imm = 0x1FFF
|
||||
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
|
||||
|
@ -572,8 +572,8 @@ define void @rotr1_64_mem(i64* %Aptr) nounwind {
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: shldl $31, %ecx, %esi
|
||||
; X86-NEXT: shldl $31, %edx, %ecx
|
||||
; X86-NEXT: movl %ecx, 4(%eax)
|
||||
; X86-NEXT: movl %esi, (%eax)
|
||||
; X86-NEXT: movl %ecx, 4(%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
@ -244,32 +244,32 @@ define void @rotate_left_m64(i64 *%pa, i64 %b) {
|
||||
; X86-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl (%eax), %edx
|
||||
; X86-NEXT: movl (%eax), %esi
|
||||
; X86-NEXT: movl 4(%eax), %ebx
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: shll %cl, %esi
|
||||
; X86-NEXT: movl %esi, %edx
|
||||
; X86-NEXT: shll %cl, %edx
|
||||
; X86-NEXT: movl %ebx, %edi
|
||||
; X86-NEXT: shldl %cl, %edx, %edi
|
||||
; X86-NEXT: shldl %cl, %esi, %edi
|
||||
; X86-NEXT: testb $32, %cl
|
||||
; X86-NEXT: je .LBB6_2
|
||||
; X86-NEXT: # %bb.1:
|
||||
; X86-NEXT: movl %esi, %edi
|
||||
; X86-NEXT: xorl %esi, %esi
|
||||
; X86-NEXT: movl %edx, %edi
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: .LBB6_2:
|
||||
; X86-NEXT: negb %cl
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: shrl %cl, %ebp
|
||||
; X86-NEXT: shrdl %cl, %ebx, %edx
|
||||
; X86-NEXT: shrdl %cl, %ebx, %esi
|
||||
; X86-NEXT: testb $32, %cl
|
||||
; X86-NEXT: je .LBB6_4
|
||||
; X86-NEXT: # %bb.3:
|
||||
; X86-NEXT: movl %ebp, %edx
|
||||
; X86-NEXT: movl %ebp, %esi
|
||||
; X86-NEXT: xorl %ebp, %ebp
|
||||
; X86-NEXT: .LBB6_4:
|
||||
; X86-NEXT: orl %esi, %edx
|
||||
; X86-NEXT: orl %ebp, %edi
|
||||
; X86-NEXT: orl %edx, %esi
|
||||
; X86-NEXT: movl %edx, (%eax)
|
||||
; X86-NEXT: movl %edi, 4(%eax)
|
||||
; X86-NEXT: movl %esi, (%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: popl %edi
|
||||
@ -336,10 +336,10 @@ define void @rotate_right_m64(i64 *%pa, i64 %b) {
|
||||
; X86-NEXT: movl %ebp, %esi
|
||||
; X86-NEXT: xorl %ebp, %ebp
|
||||
; X86-NEXT: .LBB7_4:
|
||||
; X86-NEXT: orl %esi, %edx
|
||||
; X86-NEXT: orl %ebp, %edi
|
||||
; X86-NEXT: movl %edx, 4(%eax)
|
||||
; X86-NEXT: orl %esi, %edx
|
||||
; X86-NEXT: movl %edi, (%eax)
|
||||
; X86-NEXT: movl %edx, 4(%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: popl %edi
|
||||
|
@ -460,10 +460,10 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX1-NEXT: vpaddsw (%rsi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddsw 16(%rsi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovq %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: vpaddsw (%rsi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa %xmm0, (%rdx)
|
||||
; AVX1-NEXT: vmovq %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v12i16:
|
||||
|
@ -144,8 +144,8 @@ define void @t5ptr(i64 %t, i64* %ptr) nounwind {
|
||||
; X32-NEXT: movl %esi, %edx
|
||||
; X32-NEXT: xorl %esi, %esi
|
||||
; X32-NEXT: .LBB5_2:
|
||||
; X32-NEXT: movl %esi, 4(%eax)
|
||||
; X32-NEXT: movl %edx, (%eax)
|
||||
; X32-NEXT: movl %esi, 4(%eax)
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %edi
|
||||
; X32-NEXT: retl
|
||||
|
@ -746,18 +746,18 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
|
||||
; X86-SSE-NEXT: pmulhuw %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: pmullw %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
|
||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X86-SSE-NEXT: pmulhuw %xmm1, %xmm4
|
||||
; X86-SSE-NEXT: pmullw %xmm1, %xmm3
|
||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; X86-SSE-NEXT: movdqu %xmm3, 48(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm1, 32(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm2, 16(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; X86-SSE-NEXT: movdqu %xmm3, 32(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm1, 48(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: popl %esi
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
@ -818,18 +818,18 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
|
||||
; X64-SSE-NEXT: pmulhuw %xmm0, %xmm4
|
||||
; X64-SSE-NEXT: pmullw %xmm0, %xmm2
|
||||
; X64-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
|
||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X64-SSE-NEXT: pmulhuw %xmm1, %xmm4
|
||||
; X64-SSE-NEXT: pmullw %xmm1, %xmm3
|
||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; X64-SSE-NEXT: movdqu %xmm3, 48(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm1, 32(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm2, 16(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm0, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; X64-SSE-NEXT: movdqu %xmm3, 32(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm1, 48(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm0, 16(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX1-LABEL: mul_16xi16:
|
||||
@ -1262,18 +1262,18 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
|
||||
; X86-SSE-NEXT: pmulhw %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: pmullw %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
|
||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X86-SSE-NEXT: pmulhw %xmm1, %xmm4
|
||||
; X86-SSE-NEXT: pmullw %xmm1, %xmm3
|
||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; X86-SSE-NEXT: movdqu %xmm3, 48(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm1, 32(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm2, 16(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; X86-SSE-NEXT: movdqu %xmm3, 32(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm1, 48(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: popl %esi
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
@ -1334,18 +1334,18 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
|
||||
; X64-SSE-NEXT: pmulhw %xmm0, %xmm4
|
||||
; X64-SSE-NEXT: pmullw %xmm0, %xmm2
|
||||
; X64-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
|
||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X64-SSE-NEXT: pmulhw %xmm1, %xmm4
|
||||
; X64-SSE-NEXT: pmullw %xmm1, %xmm3
|
||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; X64-SSE-NEXT: movdqu %xmm3, 48(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm1, 32(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm2, 16(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm0, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; X64-SSE-NEXT: movdqu %xmm3, 32(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm1, 48(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm0, 16(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX1-LABEL: mul_16xi16_sext:
|
||||
|
@ -740,18 +740,18 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
|
||||
; X86-SSE-NEXT: pmulhuw %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: pmullw %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
|
||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X86-SSE-NEXT: pmulhuw %xmm1, %xmm4
|
||||
; X86-SSE-NEXT: pmullw %xmm1, %xmm3
|
||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; X86-SSE-NEXT: movdqu %xmm3, 48(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm1, 32(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm2, 16(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; X86-SSE-NEXT: movdqu %xmm3, 32(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm1, 48(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: popl %esi
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
@ -812,18 +812,18 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
|
||||
; X64-SSE-NEXT: pmulhuw %xmm0, %xmm4
|
||||
; X64-SSE-NEXT: pmullw %xmm0, %xmm2
|
||||
; X64-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
|
||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X64-SSE-NEXT: pmulhuw %xmm1, %xmm4
|
||||
; X64-SSE-NEXT: pmullw %xmm1, %xmm3
|
||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; X64-SSE-NEXT: movdqu %xmm3, 48(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm1, 32(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm2, 16(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm0, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; X64-SSE-NEXT: movdqu %xmm3, 32(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm1, 48(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm0, 16(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX1-LABEL: mul_16xi16:
|
||||
@ -1240,18 +1240,18 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
|
||||
; X86-SSE-NEXT: pmulhw %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: pmullw %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
|
||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X86-SSE-NEXT: pmulhw %xmm1, %xmm4
|
||||
; X86-SSE-NEXT: pmullw %xmm1, %xmm3
|
||||
; X86-SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; X86-SSE-NEXT: movdqu %xmm3, 48(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm1, 32(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm2, 16(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; X86-SSE-NEXT: movdqu %xmm3, 32(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm1, 48(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: popl %esi
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
@ -1312,18 +1312,18 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
|
||||
; X64-SSE-NEXT: pmulhw %xmm0, %xmm4
|
||||
; X64-SSE-NEXT: pmullw %xmm0, %xmm2
|
||||
; X64-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
|
||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X64-SSE-NEXT: pmulhw %xmm1, %xmm4
|
||||
; X64-SSE-NEXT: pmullw %xmm1, %xmm3
|
||||
; X64-SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; X64-SSE-NEXT: movdqu %xmm3, 48(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm1, 32(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm2, 16(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm0, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; X64-SSE-NEXT: movdqu %xmm3, 32(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm1, 48(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: movdqu %xmm0, 16(%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX1-LABEL: mul_16xi16_sext:
|
||||
|
@ -460,10 +460,10 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX1-NEXT: vpsubsw (%rsi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsubsw 16(%rsi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovq %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: vpsubsw (%rsi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa %xmm0, (%rdx)
|
||||
; AVX1-NEXT: vmovq %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v12i16:
|
||||
|
@ -460,10 +460,10 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX1-NEXT: vpaddusw (%rsi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddusw 16(%rsi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovq %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: vpaddusw (%rsi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa %xmm0, (%rdx)
|
||||
; AVX1-NEXT: vmovq %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v12i16:
|
||||
|
@ -460,10 +460,10 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX1-NEXT: vpsubusw (%rsi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsubusw 16(%rsi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovq %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: vpsubusw (%rsi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa %xmm0, (%rdx)
|
||||
; AVX1-NEXT: vmovq %xmm1, 16(%rdx)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v12i16:
|
||||
|
@ -21,11 +21,11 @@ define i32 @check_flag(i32 %flags, ...) nounwind {
|
||||
; CHECK-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: LBB0_2: ## %entry
|
||||
; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testl $512, %edi ## imm = 0x200
|
||||
; CHECK-NEXT: je LBB0_4
|
||||
|
@ -186,14 +186,14 @@ define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
|
||||
; X32-SSE: # %bb.0: # %entry
|
||||
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
|
||||
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
|
||||
; X32-SSE-NEXT: cvtps2pd (%ecx), %xmm0 # encoding: [0x0f,0x5a,0x01]
|
||||
; X32-SSE-NEXT: cvtps2pd 8(%ecx), %xmm1 # encoding: [0x0f,0x5a,0x49,0x08]
|
||||
; X32-SSE-NEXT: cvtps2pd 16(%ecx), %xmm2 # encoding: [0x0f,0x5a,0x51,0x10]
|
||||
; X32-SSE-NEXT: cvtps2pd 24(%ecx), %xmm3 # encoding: [0x0f,0x5a,0x59,0x18]
|
||||
; X32-SSE-NEXT: movups %xmm3, 48(%eax) # encoding: [0x0f,0x11,0x58,0x30]
|
||||
; X32-SSE-NEXT: movups %xmm2, 32(%eax) # encoding: [0x0f,0x11,0x50,0x20]
|
||||
; X32-SSE-NEXT: movups %xmm1, 16(%eax) # encoding: [0x0f,0x11,0x48,0x10]
|
||||
; X32-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
|
||||
; X32-SSE-NEXT: cvtps2pd 8(%ecx), %xmm0 # encoding: [0x0f,0x5a,0x41,0x08]
|
||||
; X32-SSE-NEXT: cvtps2pd (%ecx), %xmm1 # encoding: [0x0f,0x5a,0x09]
|
||||
; X32-SSE-NEXT: cvtps2pd 24(%ecx), %xmm2 # encoding: [0x0f,0x5a,0x51,0x18]
|
||||
; X32-SSE-NEXT: cvtps2pd 16(%ecx), %xmm3 # encoding: [0x0f,0x5a,0x59,0x10]
|
||||
; X32-SSE-NEXT: movups %xmm3, 32(%eax) # encoding: [0x0f,0x11,0x58,0x20]
|
||||
; X32-SSE-NEXT: movups %xmm2, 48(%eax) # encoding: [0x0f,0x11,0x50,0x30]
|
||||
; X32-SSE-NEXT: movups %xmm1, (%eax) # encoding: [0x0f,0x11,0x08]
|
||||
; X32-SSE-NEXT: movups %xmm0, 16(%eax) # encoding: [0x0f,0x11,0x40,0x10]
|
||||
; X32-SSE-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX-LABEL: fpext_frommem8:
|
||||
@ -218,14 +218,14 @@ define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
|
||||
;
|
||||
; X64-SSE-LABEL: fpext_frommem8:
|
||||
; X64-SSE: # %bb.0: # %entry
|
||||
; X64-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # encoding: [0x0f,0x5a,0x07]
|
||||
; X64-SSE-NEXT: cvtps2pd 8(%rdi), %xmm1 # encoding: [0x0f,0x5a,0x4f,0x08]
|
||||
; X64-SSE-NEXT: cvtps2pd 16(%rdi), %xmm2 # encoding: [0x0f,0x5a,0x57,0x10]
|
||||
; X64-SSE-NEXT: cvtps2pd 24(%rdi), %xmm3 # encoding: [0x0f,0x5a,0x5f,0x18]
|
||||
; X64-SSE-NEXT: movups %xmm3, 48(%rsi) # encoding: [0x0f,0x11,0x5e,0x30]
|
||||
; X64-SSE-NEXT: movups %xmm2, 32(%rsi) # encoding: [0x0f,0x11,0x56,0x20]
|
||||
; X64-SSE-NEXT: movups %xmm1, 16(%rsi) # encoding: [0x0f,0x11,0x4e,0x10]
|
||||
; X64-SSE-NEXT: movups %xmm0, (%rsi) # encoding: [0x0f,0x11,0x06]
|
||||
; X64-SSE-NEXT: cvtps2pd 8(%rdi), %xmm0 # encoding: [0x0f,0x5a,0x47,0x08]
|
||||
; X64-SSE-NEXT: cvtps2pd (%rdi), %xmm1 # encoding: [0x0f,0x5a,0x0f]
|
||||
; X64-SSE-NEXT: cvtps2pd 24(%rdi), %xmm2 # encoding: [0x0f,0x5a,0x57,0x18]
|
||||
; X64-SSE-NEXT: cvtps2pd 16(%rdi), %xmm3 # encoding: [0x0f,0x5a,0x5f,0x10]
|
||||
; X64-SSE-NEXT: movups %xmm3, 32(%rsi) # encoding: [0x0f,0x11,0x5e,0x20]
|
||||
; X64-SSE-NEXT: movups %xmm2, 48(%rsi) # encoding: [0x0f,0x11,0x56,0x30]
|
||||
; X64-SSE-NEXT: movups %xmm1, (%rsi) # encoding: [0x0f,0x11,0x0e]
|
||||
; X64-SSE-NEXT: movups %xmm0, 16(%rsi) # encoding: [0x0f,0x11,0x46,0x10]
|
||||
; X64-SSE-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX-LABEL: fpext_frommem8:
|
||||
|
@ -21,9 +21,9 @@ define void @convert(<7 x i32>* %dst, <14 x i16>* %src) nounwind {
|
||||
; CHECK-NEXT: movdqa 16(%edx,%eax), %xmm2
|
||||
; CHECK-NEXT: psubw %xmm0, %xmm1
|
||||
; CHECK-NEXT: psubw %xmm0, %xmm2
|
||||
; CHECK-NEXT: pextrd $2, %xmm2, 24(%ecx,%eax)
|
||||
; CHECK-NEXT: pextrd $1, %xmm2, 20(%ecx,%eax)
|
||||
; CHECK-NEXT: movd %xmm2, 16(%ecx,%eax)
|
||||
; CHECK-NEXT: pextrd $1, %xmm2, 20(%ecx,%eax)
|
||||
; CHECK-NEXT: pextrd $2, %xmm2, 24(%ecx,%eax)
|
||||
; CHECK-NEXT: movdqa %xmm1, (%ecx,%eax)
|
||||
; CHECK-NEXT: incl (%esp)
|
||||
; CHECK-NEXT: cmpl $3, (%esp)
|
||||
|
@ -47,8 +47,8 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||
; X86-NEXT: pinsrd $1, 4(%ecx), %xmm1
|
||||
; X86-NEXT: pinsrd $2, 8(%ecx), %xmm1
|
||||
; X86-NEXT: paddd %xmm0, %xmm1
|
||||
; X86-NEXT: pextrd $2, %xmm1, 8(%eax)
|
||||
; X86-NEXT: pextrd $1, %xmm1, 4(%eax)
|
||||
; X86-NEXT: pextrd $2, %xmm1, 8(%eax)
|
||||
; X86-NEXT: movd %xmm1, (%eax)
|
||||
; X86-NEXT: retl $4
|
||||
;
|
||||
@ -81,9 +81,9 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
||||
; X86-NEXT: movdqa 16(%edx), %xmm1
|
||||
; X86-NEXT: paddd (%ecx), %xmm0
|
||||
; X86-NEXT: paddd 16(%ecx), %xmm1
|
||||
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
|
||||
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
||||
; X86-NEXT: movd %xmm1, 16(%eax)
|
||||
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
||||
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
|
||||
; X86-NEXT: movdqa %xmm0, (%eax)
|
||||
; X86-NEXT: retl $4
|
||||
;
|
||||
@ -94,8 +94,8 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
||||
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; X64-NEXT: paddd (%rdx), %xmm0
|
||||
; X64-NEXT: paddd 16(%rdx), %xmm1
|
||||
; X64-NEXT: pextrd $2, %xmm1, 24(%rdi)
|
||||
; X64-NEXT: movq %xmm1, 16(%rdi)
|
||||
; X64-NEXT: pextrd $2, %xmm1, 24(%rdi)
|
||||
; X64-NEXT: movdqa %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%a = load %i32vec7, %i32vec7* %ap, align 16
|
||||
@ -116,10 +116,10 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
||||
; X86-NEXT: movdqa (%edx), %xmm1
|
||||
; X86-NEXT: movdqa 16(%edx), %xmm2
|
||||
; X86-NEXT: paddd (%ecx), %xmm1
|
||||
; X86-NEXT: paddd 16(%ecx), %xmm2
|
||||
; X86-NEXT: paddd 32(%ecx), %xmm0
|
||||
; X86-NEXT: movdqa %xmm0, 32(%eax)
|
||||
; X86-NEXT: paddd 16(%ecx), %xmm2
|
||||
; X86-NEXT: movdqa %xmm2, 16(%eax)
|
||||
; X86-NEXT: movdqa %xmm0, 32(%eax)
|
||||
; X86-NEXT: movdqa %xmm1, (%eax)
|
||||
; X86-NEXT: retl $4
|
||||
;
|
||||
@ -130,10 +130,10 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
||||
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; X64-NEXT: movdqa 32(%rsi), %xmm2
|
||||
; X64-NEXT: paddd (%rdx), %xmm0
|
||||
; X64-NEXT: paddd 16(%rdx), %xmm1
|
||||
; X64-NEXT: paddd 32(%rdx), %xmm2
|
||||
; X64-NEXT: movdqa %xmm2, 32(%rdi)
|
||||
; X64-NEXT: paddd 16(%rdx), %xmm1
|
||||
; X64-NEXT: movdqa %xmm1, 16(%rdi)
|
||||
; X64-NEXT: movdqa %xmm2, 32(%rdi)
|
||||
; X64-NEXT: movdqa %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%a = load %i32vec12, %i32vec12* %ap, align 16
|
||||
@ -225,8 +225,8 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
|
||||
; X86-NEXT: movdqa 16(%edx), %xmm1
|
||||
; X86-NEXT: paddw (%ecx), %xmm0
|
||||
; X86-NEXT: paddw 16(%ecx), %xmm1
|
||||
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
||||
; X86-NEXT: movd %xmm1, 16(%eax)
|
||||
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
||||
; X86-NEXT: movdqa %xmm0, (%eax)
|
||||
; X86-NEXT: retl $4
|
||||
;
|
||||
@ -258,10 +258,10 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
|
||||
; X86-NEXT: movdqa (%edx), %xmm1
|
||||
; X86-NEXT: movdqa 16(%edx), %xmm2
|
||||
; X86-NEXT: paddw (%ecx), %xmm1
|
||||
; X86-NEXT: paddw 16(%ecx), %xmm2
|
||||
; X86-NEXT: paddw 32(%ecx), %xmm0
|
||||
; X86-NEXT: movd %xmm0, 32(%eax)
|
||||
; X86-NEXT: paddw 16(%ecx), %xmm2
|
||||
; X86-NEXT: movdqa %xmm2, 16(%eax)
|
||||
; X86-NEXT: movd %xmm0, 32(%eax)
|
||||
; X86-NEXT: movdqa %xmm1, (%eax)
|
||||
; X86-NEXT: retl $4
|
||||
;
|
||||
@ -272,10 +272,10 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
|
||||
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; X64-NEXT: movdqa 32(%rsi), %xmm2
|
||||
; X64-NEXT: paddw (%rdx), %xmm0
|
||||
; X64-NEXT: paddw 16(%rdx), %xmm1
|
||||
; X64-NEXT: paddw 32(%rdx), %xmm2
|
||||
; X64-NEXT: movd %xmm2, 32(%rdi)
|
||||
; X64-NEXT: paddw 16(%rdx), %xmm1
|
||||
; X64-NEXT: movdqa %xmm1, 16(%rdi)
|
||||
; X64-NEXT: movd %xmm2, 32(%rdi)
|
||||
; X64-NEXT: movdqa %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%a = load %i16vec18, %i16vec18* %ap, align 16
|
||||
@ -331,11 +331,11 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
|
||||
; X86-NEXT: movdqa 16(%edx), %xmm1
|
||||
; X86-NEXT: paddb (%ecx), %xmm0
|
||||
; X86-NEXT: paddb 16(%ecx), %xmm1
|
||||
; X86-NEXT: pextrb $14, %xmm1, 30(%eax)
|
||||
; X86-NEXT: pextrw $6, %xmm1, 28(%eax)
|
||||
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
|
||||
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
||||
; X86-NEXT: movd %xmm1, 16(%eax)
|
||||
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
||||
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
|
||||
; X86-NEXT: pextrw $6, %xmm1, 28(%eax)
|
||||
; X86-NEXT: pextrb $14, %xmm1, 30(%eax)
|
||||
; X86-NEXT: movdqa %xmm0, (%eax)
|
||||
; X86-NEXT: retl $4
|
||||
;
|
||||
@ -346,10 +346,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
|
||||
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; X64-NEXT: paddb (%rdx), %xmm0
|
||||
; X64-NEXT: paddb 16(%rdx), %xmm1
|
||||
; X64-NEXT: pextrb $14, %xmm1, 30(%rdi)
|
||||
; X64-NEXT: pextrw $6, %xmm1, 28(%rdi)
|
||||
; X64-NEXT: pextrd $2, %xmm1, 24(%rdi)
|
||||
; X64-NEXT: movq %xmm1, 16(%rdi)
|
||||
; X64-NEXT: pextrd $2, %xmm1, 24(%rdi)
|
||||
; X64-NEXT: pextrw $6, %xmm1, 28(%rdi)
|
||||
; X64-NEXT: pextrb $14, %xmm1, 30(%rdi)
|
||||
; X64-NEXT: movdqa %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%a = load %i8vec31, %i8vec31* %ap, align 16
|
||||
|
@ -29,9 +29,9 @@ define void @f2(i32 %p, ...) "no-frame-pointer-elim"="true" {
|
||||
; ALL-NEXT: movq %rsp, %rbp
|
||||
; ALL-NEXT: .seh_setframe 5, 0
|
||||
; ALL-NEXT: .seh_endprologue
|
||||
; ALL-NEXT: movq %r9, 48(%rbp)
|
||||
; ALL-NEXT: movq %r8, 40(%rbp)
|
||||
; ALL-NEXT: movq %rdx, 32(%rbp)
|
||||
; ALL-NEXT: movq %r8, 40(%rbp)
|
||||
; ALL-NEXT: movq %r9, 48(%rbp)
|
||||
; ALL-NEXT: leaq 32(%rbp), %rax
|
||||
; ALL-NEXT: movq %rax, (%rbp)
|
||||
; ALL-NEXT: addq $8, %rsp
|
||||
|
@ -6,9 +6,9 @@
|
||||
define void @average_va(i32 %count, ...) nounwind {
|
||||
entry:
|
||||
; CHECK: pushq
|
||||
; CHECK: movq %r9, 40(%rsp)
|
||||
; CHECK: movq %r8, 32(%rsp)
|
||||
; CHECK: movq %rdx, 24(%rsp)
|
||||
; CHECK-DAG: movq %r9, 40(%rsp)
|
||||
; CHECK-DAG: movq %r8, 32(%rsp)
|
||||
; CHECK-DAG: movq %rdx, 24(%rsp)
|
||||
; CHECK: leaq 24(%rsp), %rax
|
||||
|
||||
%ap = alloca i8*, align 8 ; <i8**> [#uses=1]
|
||||
|
@ -6,9 +6,9 @@
|
||||
define win64cc void @average_va(i32 %count, ...) nounwind {
|
||||
entry:
|
||||
; CHECK: pushq
|
||||
; CHECK: movq %r9, 40(%rsp)
|
||||
; CHECK: movq %r8, 32(%rsp)
|
||||
; CHECK: movq %rdx, 24(%rsp)
|
||||
; CHECK-DAG: movq %r9, 40(%rsp)
|
||||
; CHECK-DAG: movq %r8, 32(%rsp)
|
||||
; CHECK-DAG: movq %rdx, 24(%rsp)
|
||||
; CHECK: leaq 24(%rsp), %rax
|
||||
|
||||
%ap = alloca i8*, align 8 ; <i8**> [#uses=1]
|
||||
@ -59,8 +59,8 @@ entry:
|
||||
|
||||
; CHECK-LABEL: copy1:
|
||||
; CHECK: leaq 32(%rsp), [[REG_copy1:%[a-z]+]]
|
||||
; CHECK: movq [[REG_copy1]], 8(%rsp)
|
||||
; CHECK: movq [[REG_copy1]], (%rsp)
|
||||
; CHECK-DAG: movq [[REG_copy1]], 8(%rsp)
|
||||
; CHECK-DAG: movq [[REG_copy1]], (%rsp)
|
||||
; CHECK: ret
|
||||
define win64cc void @copy1(i64 %a0, ...) nounwind {
|
||||
entry:
|
||||
|
@ -39,8 +39,8 @@ entry:
|
||||
; CHECK: extsp 4
|
||||
; CHECK: stw lr, sp[1]
|
||||
; CHECK: mov r11, r1
|
||||
; CHECK: stw r2, sp[3]
|
||||
; CHECK: stw r3, sp[4]
|
||||
; CHECK-DAG: stw r2, sp[3]
|
||||
; CHECK-DAG: stw r3, sp[4]
|
||||
; CHECK: ldw r0, r0[0]
|
||||
; CHECK: stw r0, sp[2]
|
||||
; CHECK: ldaw r1, sp[2]
|
||||
|
Loading…
x
Reference in New Issue
Block a user