mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
Re-land MachineInstr: Reason locally about some memory objects before going to AA.
Summary: Reverts r311008 to reinstate r310825 with a fix. Refine alias checking for pseudo vs value to be conservative. This fixes the original failure in builtbot unittest SingleSource/UnitTests/2003-07-09-SignedArgs. Reviewers: hfinkel, nemanjai, efriedma Reviewed By: efriedma Subscribers: bjope, mcrosier, nhaehnle, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D36900 llvm-svn: 312126
This commit is contained in:
parent
3d0c6d78ae
commit
7259c5a50d
@ -1663,6 +1663,7 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
|
||||
bool UseTBAA) {
|
||||
const MachineFunction *MF = getParent()->getParent();
|
||||
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
||||
const MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
|
||||
// If neither instruction stores to memory, they can't alias in any
|
||||
// meaningful way, even if they read from the same address.
|
||||
@ -1673,9 +1674,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
|
||||
if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
|
||||
return false;
|
||||
|
||||
if (!AA)
|
||||
return true;
|
||||
|
||||
// FIXME: Need to handle multiple memory operands to support all targets.
|
||||
if (!hasOneMemOperand() || !Other.hasOneMemOperand())
|
||||
return true;
|
||||
@ -1683,9 +1681,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
|
||||
MachineMemOperand *MMOa = *memoperands_begin();
|
||||
MachineMemOperand *MMOb = *Other.memoperands_begin();
|
||||
|
||||
if (!MMOa->getValue() || !MMOb->getValue())
|
||||
return true;
|
||||
|
||||
// The following interface to AA is fashioned after DAGCombiner::isAlias
|
||||
// and operates with MachineMemOperand offset with some important
|
||||
// assumptions:
|
||||
@ -1698,22 +1693,53 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
|
||||
// - There should never be any negative offsets here.
|
||||
//
|
||||
// FIXME: Modify API to hide this math from "user"
|
||||
// FIXME: Even before we go to AA we can reason locally about some
|
||||
// Even before we go to AA we can reason locally about some
|
||||
// memory objects. It can save compile time, and possibly catch some
|
||||
// corner cases not currently covered.
|
||||
|
||||
assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
|
||||
assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
|
||||
int64_t OffsetA = MMOa->getOffset();
|
||||
int64_t OffsetB = MMOb->getOffset();
|
||||
|
||||
int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
|
||||
int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
|
||||
int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
|
||||
int64_t MinOffset = std::min(OffsetA, OffsetB);
|
||||
int64_t WidthA = MMOa->getSize();
|
||||
int64_t WidthB = MMOb->getSize();
|
||||
const Value *ValA = MMOa->getValue();
|
||||
const Value *ValB = MMOb->getValue();
|
||||
bool SameVal = (ValA && ValB && (ValA == ValB));
|
||||
if (!SameVal) {
|
||||
const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
|
||||
const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
|
||||
if (PSVa && ValB && !PSVa->mayAlias(&MFI))
|
||||
return false;
|
||||
if (PSVb && ValA && !PSVb->mayAlias(&MFI))
|
||||
return false;
|
||||
if (PSVa && PSVb && (PSVa == PSVb))
|
||||
SameVal = true;
|
||||
}
|
||||
|
||||
AliasResult AAResult =
|
||||
AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
|
||||
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
|
||||
MemoryLocation(MMOb->getValue(), Overlapb,
|
||||
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
|
||||
if (SameVal) {
|
||||
int64_t MaxOffset = std::max(OffsetA, OffsetB);
|
||||
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
|
||||
return (MinOffset + LowWidth > MaxOffset);
|
||||
}
|
||||
|
||||
if (!AA)
|
||||
return true;
|
||||
|
||||
if (!ValA || !ValB)
|
||||
return true;
|
||||
|
||||
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
|
||||
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
|
||||
|
||||
int64_t Overlapa = WidthA + OffsetA - MinOffset;
|
||||
int64_t Overlapb = WidthB + OffsetB - MinOffset;
|
||||
|
||||
AliasResult AAResult = AA->alias(
|
||||
MemoryLocation(ValA, Overlapa,
|
||||
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
|
||||
MemoryLocation(ValB, Overlapb,
|
||||
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
|
||||
|
||||
return (AAResult != NoAlias);
|
||||
}
|
||||
|
@ -130,11 +130,11 @@ define void @check_i128_align() {
|
||||
i32 42, i128 %val)
|
||||
; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
|
||||
; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
|
||||
; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16]
|
||||
; CHECK: stp [[I128HI]], {{x[0-9]+}}, [sp, #24]
|
||||
|
||||
; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
|
||||
; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
|
||||
; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
|
||||
; CHECK-NONEON: stp [[I128HI]], {{x[0-9]+}}, [sp, #24]
|
||||
; CHECK: bl check_i128_stackalign
|
||||
|
||||
call void @check_i128_regalign(i32 0, i128 42)
|
||||
|
@ -1531,7 +1531,7 @@ define void @merge_zr64_unalign(<2 x i64>* %p) {
|
||||
; CHECK-LABEL: merge_zr64_unalign:
|
||||
; CHECK: // %entry
|
||||
; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
|
||||
; STRICTALIGN: strb wzr,
|
||||
; STRICTALIGN: strb
|
||||
; STRICTALIGN: strb
|
||||
; STRICTALIGN: strb
|
||||
; STRICTALIGN: strb
|
||||
|
@ -452,15 +452,15 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
|
||||
; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8
|
||||
; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12
|
||||
|
||||
; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8
|
||||
; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4
|
||||
; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8
|
||||
|
||||
|
||||
; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8
|
||||
; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12
|
||||
|
||||
; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8
|
||||
; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4
|
||||
; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8
|
||||
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200
|
||||
@ -487,8 +487,8 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
|
||||
; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12
|
||||
|
||||
; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200
|
||||
; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8
|
||||
; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
|
||||
; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16
|
||||
; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20
|
||||
|
@ -179,8 +179,8 @@ define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)
|
||||
; GCN-NOHSA: buffer_load_dwordx2
|
||||
; GCN-HSA: flat_load_dwordx2
|
||||
|
||||
; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
|
||||
; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}}
|
||||
; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}},
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}},
|
||||
; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1
|
||||
@ -188,8 +188,6 @@ define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)
|
||||
; TODO: This should use DST, but for some there are redundant MOVs
|
||||
; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal
|
||||
; EGCM: 16
|
||||
; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal
|
||||
; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal
|
||||
define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
|
||||
entry:
|
||||
%ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
|
||||
@ -202,8 +200,8 @@ entry:
|
||||
; GCN-NOHSA: buffer_load_dwordx2
|
||||
; GCN-HSA: flat_load_dwordx2
|
||||
|
||||
; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
|
||||
; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}}
|
||||
; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}},
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}},
|
||||
; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1
|
||||
|
@ -352,22 +352,22 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(
|
||||
|
||||
; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
|
||||
; TODO: These should use DST, but for some there are redundant MOVs
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
|
@ -530,7 +530,6 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(
|
||||
; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
|
||||
; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
||||
; EG-DAG: LDS_WRITE
|
||||
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
||||
define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(3)* %in
|
||||
%ext = zext i16 %a to i64
|
||||
@ -572,7 +571,6 @@ define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16
|
||||
; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
|
||||
; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
|
||||
; EG-DAG: LDS_WRITE
|
||||
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
|
||||
define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(3)* %in
|
||||
%ext = zext <1 x i16> %load to <1 x i64>
|
||||
|
@ -1,13 +1,15 @@
|
||||
; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s --check-prefix=NOREGALLOC
|
||||
; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s --check-prefix=REGALLOC
|
||||
|
||||
@.str = private constant [1 x i8] zeroinitializer, align 1
|
||||
|
||||
define void @g() {
|
||||
entry:
|
||||
;CHECK: [sp, #8]
|
||||
;CHECK: [sp, #12]
|
||||
;CHECK: [sp]
|
||||
;NOREGALLOC: [sp, #12]
|
||||
;NOREGALLOC: [sp]
|
||||
;REGALLOC: [sp]
|
||||
;REGALLOC: [sp, #12]
|
||||
tail call void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
|
||||
ret void
|
||||
}
|
||||
|
@ -124,10 +124,10 @@ define void @i56_and_or(i56* %a) {
|
||||
; BE-LABEL: i56_and_or:
|
||||
; BE: @ BB#0:
|
||||
; BE-NEXT: mov r1, r0
|
||||
; BE-NEXT: mov r3, #128
|
||||
; BE-NEXT: ldrh r2, [r1, #4]!
|
||||
; BE-NEXT: strb r3, [r1, #2]
|
||||
; BE-NEXT: ldr r12, [r0]
|
||||
; BE-NEXT: ldrh r2, [r1, #4]!
|
||||
; BE-NEXT: mov r3, #128
|
||||
; BE-NEXT: strb r3, [r1, #2]
|
||||
; BE-NEXT: lsl r2, r2, #8
|
||||
; BE-NEXT: orr r2, r2, r12, lsl #24
|
||||
; BE-NEXT: orr r2, r2, #384
|
||||
|
@ -118,17 +118,17 @@ define void @i56_or(i56* %a) {
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movzwl 4(%rdi), %eax
|
||||
; X64-NEXT: movzbl 6(%rdi), %ecx
|
||||
; X64-NEXT: movl (%rdi), %edx
|
||||
; X64-NEXT: movb %cl, 6(%rdi)
|
||||
; X64-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
|
||||
; X64-NEXT: shll $16, %ecx
|
||||
; X64-NEXT: orl %eax, %ecx
|
||||
; X64-NEXT: shlq $32, %rcx
|
||||
; X64-NEXT: orq %rcx, %rdx
|
||||
; X64-NEXT: orq $384, %rdx # imm = 0x180
|
||||
; X64-NEXT: movl %edx, (%rdi)
|
||||
; X64-NEXT: shrq $32, %rdx
|
||||
; X64-NEXT: movw %dx, 4(%rdi)
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: orq %rcx, %rax
|
||||
; X64-NEXT: orq $384, %rax # imm = 0x180
|
||||
; X64-NEXT: movl %eax, (%rdi)
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: movw %ax, 4(%rdi)
|
||||
; X64-NEXT: retq
|
||||
%aa = load i56, i56* %a, align 1
|
||||
%b = or i56 %aa, 384
|
||||
@ -150,19 +150,19 @@ define void @i56_and_or(i56* %a) {
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movzwl 4(%rdi), %eax
|
||||
; X64-NEXT: movzbl 6(%rdi), %ecx
|
||||
; X64-NEXT: movl (%rdi), %edx
|
||||
; X64-NEXT: movb %cl, 6(%rdi)
|
||||
; X64-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
|
||||
; X64-NEXT: shll $16, %ecx
|
||||
; X64-NEXT: orl %eax, %ecx
|
||||
; X64-NEXT: shlq $32, %rcx
|
||||
; X64-NEXT: orq %rcx, %rdx
|
||||
; X64-NEXT: orq $384, %rdx # imm = 0x180
|
||||
; X64-NEXT: movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80
|
||||
; X64-NEXT: andq %rdx, %rax
|
||||
; X64-NEXT: movl %eax, (%rdi)
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: movw %ax, 4(%rdi)
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: orq %rcx, %rax
|
||||
; X64-NEXT: orq $384, %rax # imm = 0x180
|
||||
; X64-NEXT: movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80
|
||||
; X64-NEXT: andq %rax, %rcx
|
||||
; X64-NEXT: movl %ecx, (%rdi)
|
||||
; X64-NEXT: shrq $32, %rcx
|
||||
; X64-NEXT: movw %cx, 4(%rdi)
|
||||
; X64-NEXT: retq
|
||||
%b = load i56, i56* %a, align 1
|
||||
%c = and i56 %b, -128
|
||||
@ -188,20 +188,20 @@ define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
|
||||
; X64-NEXT: movzbl %sil, %eax
|
||||
; X64-NEXT: movzwl 4(%rdi), %ecx
|
||||
; X64-NEXT: movzbl 6(%rdi), %edx
|
||||
; X64-NEXT: movl (%rdi), %esi
|
||||
; X64-NEXT: movb %dl, 6(%rdi)
|
||||
; X64-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill> %RDX<def>
|
||||
; X64-NEXT: shll $16, %edx
|
||||
; X64-NEXT: orl %ecx, %edx
|
||||
; X64-NEXT: shlq $32, %rdx
|
||||
; X64-NEXT: orq %rdx, %rsi
|
||||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: orq %rdx, %rcx
|
||||
; X64-NEXT: shlq $13, %rax
|
||||
; X64-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF
|
||||
; X64-NEXT: andq %rsi, %rcx
|
||||
; X64-NEXT: orq %rax, %rcx
|
||||
; X64-NEXT: movl %ecx, (%rdi)
|
||||
; X64-NEXT: shrq $32, %rcx
|
||||
; X64-NEXT: movw %cx, 4(%rdi)
|
||||
; X64-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF
|
||||
; X64-NEXT: andq %rcx, %rdx
|
||||
; X64-NEXT: orq %rax, %rdx
|
||||
; X64-NEXT: movl %edx, (%rdi)
|
||||
; X64-NEXT: shrq $32, %rdx
|
||||
; X64-NEXT: movw %dx, 4(%rdi)
|
||||
; X64-NEXT: retq
|
||||
%extbit = zext i1 %bit to i56
|
||||
%b = load i56, i56* %a, align 1
|
||||
|
@ -12,23 +12,23 @@
|
||||
define void @t1(i32 %argc, i8** %argv) nounwind {
|
||||
entry:
|
||||
; SSE2-Darwin-LABEL: t1:
|
||||
; SSE2-Darwin: movsd _.str+16, %xmm0
|
||||
; SSE2-Darwin: movsd %xmm0, 16(%esp)
|
||||
; SSE2-Darwin: movaps _.str, %xmm0
|
||||
; SSE2-Darwin: movaps %xmm0
|
||||
; SSE2-Darwin: movsd _.str+16, %xmm0
|
||||
; SSE2-Darwin: movsd %xmm0, 16(%esp)
|
||||
; SSE2-Darwin: movb $0, 24(%esp)
|
||||
|
||||
; SSE2-Mingw32-LABEL: t1:
|
||||
; SSE2-Mingw32: movsd _.str+16, %xmm0
|
||||
; SSE2-Mingw32: movsd %xmm0, 16(%esp)
|
||||
; SSE2-Mingw32: movaps _.str, %xmm0
|
||||
; SSE2-Mingw32: movups %xmm0
|
||||
; SSE2-Mingw32: movsd _.str+16, %xmm0
|
||||
; SSE2-Mingw32: movsd %xmm0, 16(%esp)
|
||||
; SSE2-Mingw32: movb $0, 24(%esp)
|
||||
|
||||
; SSE1-LABEL: t1:
|
||||
; SSE1: movaps _.str, %xmm0
|
||||
; SSE1: movaps %xmm0
|
||||
; SSE1: movb $0, 24(%esp)
|
||||
; SSE1: movaps %xmm0
|
||||
; SSE1: movl $0, 20(%esp)
|
||||
; SSE1: movl $0, 16(%esp)
|
||||
|
||||
|
@ -25,8 +25,8 @@ define i32 @pr34088() local_unnamed_addr {
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: movaps %xmm0, (%esp)
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: movaps %xmm1, (%esp)
|
||||
; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
|
||||
; CHECK-NEXT: movaps %xmm1, (%esp)
|
||||
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl %ebp, %esp
|
||||
; CHECK-NEXT: popl %ebp
|
||||
|
@ -349,8 +349,8 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
|
||||
; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
|
||||
; ATOM-NEXT: paddd %xmm2, %xmm0
|
||||
; ATOM-NEXT: paddd %xmm2, %xmm1
|
||||
; ATOM-NEXT: movq %xmm1, 16(%rsi)
|
||||
; ATOM-NEXT: movdqa %xmm0, (%rsi)
|
||||
; ATOM-NEXT: movq %xmm1, 16(%rsi)
|
||||
; ATOM-NEXT: retq
|
||||
; ATOM-NEXT: ## -- End function
|
||||
;
|
||||
|
@ -16,9 +16,9 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: movl {{\.LCPI.*}}, %eax
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_2: # %forbody
|
||||
|
Loading…
Reference in New Issue
Block a user