diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a2d3126a912..8bf1eb9e33d 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1663,7 +1663,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA) { const MachineFunction *MF = getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - const MachineFrameInfo &MFI = MF->getFrameInfo(); // If neither instruction stores to memory, they can't alias in any // meaningful way, even if they read from the same address. @@ -1674,6 +1673,9 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) return false; + if (!AA) + return true; + // FIXME: Need to handle multiple memory operands to support all targets. if (!hasOneMemOperand() || !Other.hasOneMemOperand()) return true; @@ -1681,6 +1683,9 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, MachineMemOperand *MMOa = *memoperands_begin(); MachineMemOperand *MMOb = *Other.memoperands_begin(); + if (!MMOa->getValue() || !MMOb->getValue()) + return true; + // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important // assumptions: @@ -1693,52 +1698,22 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, // - There should never be any negative offsets here. // // FIXME: Modify API to hide this math from "user" - // Even before we go to AA we can reason locally about some + // FIXME: Even before we go to AA we can reason locally about some // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - int64_t OffsetA = MMOa->getOffset(); - int64_t OffsetB = MMOb->getOffset(); + assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); - assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); + int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); + int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; + int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; - int64_t MinOffset = std::min(OffsetA, OffsetB); - int64_t WidthA = MMOa->getSize(); - int64_t WidthB = MMOb->getSize(); - const Value *ValA = MMOa->getValue(); - const Value *ValB = MMOb->getValue(); - bool SameVal = (ValA && ValB && (ValA == ValB)); - if (!SameVal) { - const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); - const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); - if (PSVa && PSVa->isConstant(&MFI)) - return false; - if (PSVb && PSVb->isConstant(&MFI)) - return false; - if (PSVa && PSVb && (PSVa == PSVb)) - SameVal = true; - } - - if (SameVal) { - int64_t MaxOffset = std::max(OffsetA, OffsetB); - int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; - return (MinOffset + LowWidth > MaxOffset); - } - - if (!AA) - return true; - - if (!ValA || !ValB) - return true; - - int64_t Overlapa = WidthA + OffsetA - MinOffset; - int64_t Overlapb = WidthB + OffsetB - MinOffset; - - AliasResult AAResult = AA->alias( - MemoryLocation(ValA, Overlapa, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(ValB, Overlapb, - UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + AliasResult AAResult = + AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(MMOb->getValue(), Overlapb, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != NoAlias); } diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 9307b6a3e47..975e5ae8b95 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -1531,7 +1531,7 @@ define void @merge_zr64_unalign(<2 x i64>* %p) { ; CHECK-LABEL: merge_zr64_unalign: ; CHECK: // %entry ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] -; STRICTALIGN: strb +; STRICTALIGN: strb wzr, ; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb diff --git a/test/CodeGen/AMDGPU/call-argument-types.ll b/test/CodeGen/AMDGPU/call-argument-types.ll index 740a74a9d40..589b333e608 100644 --- a/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/test/CodeGen/AMDGPU/call-argument-types.ll @@ -452,15 +452,15 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12 -; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4 ; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8 +; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4 ; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8 ; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12 -; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4 ; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8 +; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4 ; GCN-NEXT: s_swappc_b64 ; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200 @@ -487,8 +487,8 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12 ; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200 -; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 ; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8 +; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 ; GCN-NEXT: s_swappc_b64 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20 diff --git a/test/CodeGen/AMDGPU/load-global-i16.ll b/test/CodeGen/AMDGPU/load-global-i16.ll index 6d243340395..cb2495d5fdc 100644 --- a/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/test/CodeGen/AMDGPU/load-global-i16.ll @@ -179,8 +179,8 @@ define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1) ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 @@ -188,6 +188,8 @@ define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1) ; TODO: This should use DST, but for some there are redundant MOVs ; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal ; EGCM: 16 +; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal +; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in @@ -200,8 +202,8 @@ entry: ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1 diff --git a/test/CodeGen/AMDGPU/load-global-i8.ll b/test/CodeGen/AMDGPU/load-global-i8.ll index d7ebd46bc3e..3fe6bd26be1 100644 --- a/test/CodeGen/AMDGPU/load-global-i8.ll +++ b/test/CodeGen/AMDGPU/load-global-i8.ll @@ -352,22 +352,22 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace( ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 ; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal ; EG-DAG: 8 ; EG-DAG: 8 ; EG-DAG: 8 diff --git a/test/CodeGen/AMDGPU/load-local-i16.ll b/test/CodeGen/AMDGPU/load-local-i16.ll index 875af807ad4..7de3f3b28c6 100644 --- a/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/test/CodeGen/AMDGPU/load-local-i16.ll @@ -530,6 +530,7 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace( ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE +; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { %a = load i16, i16 addrspace(3)* %in %ext = zext i16 %a to i64 @@ -571,6 +572,7 @@ define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE +; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(3)* %in %ext = zext <1 x i16> %load to <1 x i64> diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll index 98a89a07af6..39f3292e260 100644 --- a/test/CodeGen/ARM/2009-10-27-double-align.ll +++ b/test/CodeGen/ARM/2009-10-27-double-align.ll @@ -1,15 +1,13 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s --check-prefix=NOREGALLOC -; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s --check-prefix=REGALLOC +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s @.str = private constant [1 x i8] zeroinitializer, align 1 define void @g() { entry: ;CHECK: [sp, #8] -;NOREGALLOC: [sp, #12] -;NOREGALLOC: [sp] -;REGALLOC: [sp] -;REGALLOC: [sp, #12] +;CHECK: [sp, #12] +;CHECK: [sp] tail call void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00) ret void } diff --git a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll index 6d62fd31f97..a633c0291c6 100644 --- a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -124,10 +124,10 @@ define void @i56_and_or(i56* %a) { ; BE-LABEL: i56_and_or: ; BE: @ BB#0: ; BE-NEXT: mov r1, r0 -; BE-NEXT: ldr r12, [r0] -; BE-NEXT: ldrh r2, [r1, #4]! ; BE-NEXT: mov r3, #128 +; BE-NEXT: ldrh r2, [r1, #4]! ; BE-NEXT: strb r3, [r1, #2] +; BE-NEXT: ldr r12, [r0] ; BE-NEXT: lsl r2, r2, #8 ; BE-NEXT: orr r2, r2, r12, lsl #24 ; BE-NEXT: orr r2, r2, #384 diff --git a/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/test/CodeGen/X86/illegal-bitfield-loadstore.ll index 8059e4acbb1..5425670fbb1 100644 --- a/test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ b/test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -118,17 +118,17 @@ define void @i56_or(i56* %a) { ; X64: # BB#0: ; X64-NEXT: movzwl 4(%rdi), %eax ; X64-NEXT: movzbl 6(%rdi), %ecx +; X64-NEXT: movl (%rdi), %edx ; X64-NEXT: movb %cl, 6(%rdi) ; X64-NEXT: # kill: %ECX %ECX %RCX %RCX ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shlq $32, %rcx -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: orq $384, %rax # imm = 0x180 -; X64-NEXT: movl %eax, (%rdi) -; X64-NEXT: shrq $32, %rax -; X64-NEXT: movw %ax, 4(%rdi) +; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: orq $384, %rdx # imm = 0x180 +; X64-NEXT: movl %edx, (%rdi) +; X64-NEXT: shrq $32, %rdx +; X64-NEXT: movw %dx, 4(%rdi) ; X64-NEXT: retq %aa = load i56, i56* %a, align 1 %b = or i56 %aa, 384 @@ -150,19 +150,19 @@ define void @i56_and_or(i56* %a) { ; X64: # BB#0: ; X64-NEXT: movzwl 4(%rdi), %eax ; X64-NEXT: movzbl 6(%rdi), %ecx +; X64-NEXT: movl (%rdi), %edx ; X64-NEXT: movb %cl, 6(%rdi) ; X64-NEXT: # kill: %ECX %ECX %RCX %RCX ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shlq $32, %rcx -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: orq $384, %rax # imm = 0x180 -; X64-NEXT: movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80 -; X64-NEXT: andq %rax, %rcx -; X64-NEXT: movl %ecx, (%rdi) -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: movw %cx, 4(%rdi) +; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: orq $384, %rdx # imm = 0x180 +; X64-NEXT: movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80 +; X64-NEXT: andq %rdx, %rax +; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: shrq $32, %rax +; X64-NEXT: movw %ax, 4(%rdi) ; X64-NEXT: retq %b = load i56, i56* %a, align 1 %c = and i56 %b, -128 @@ -188,20 +188,20 @@ define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: movzwl 4(%rdi), %ecx ; X64-NEXT: movzbl 6(%rdi), %edx +; X64-NEXT: movl (%rdi), %esi ; X64-NEXT: movb %dl, 6(%rdi) ; X64-NEXT: # kill: %EDX %EDX %RDX %RDX ; X64-NEXT: shll $16, %edx ; X64-NEXT: orl %ecx, %edx ; X64-NEXT: shlq $32, %rdx -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: orq %rdx, %rcx +; X64-NEXT: orq %rdx, %rsi ; X64-NEXT: shlq $13, %rax -; X64-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF -; X64-NEXT: andq %rcx, %rdx -; X64-NEXT: orq %rax, %rdx -; X64-NEXT: movl %edx, (%rdi) -; X64-NEXT: shrq $32, %rdx -; X64-NEXT: movw %dx, 4(%rdi) +; X64-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF +; X64-NEXT: andq %rsi, %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movl %ecx, (%rdi) +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: movw %cx, 4(%rdi) ; X64-NEXT: retq %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1 diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll index bd8f6e91fa3..7ef61c9a677 100644 --- a/test/CodeGen/X86/memcpy-2.ll +++ b/test/CodeGen/X86/memcpy-2.ll @@ -12,23 +12,23 @@ define void @t1(i32 %argc, i8** %argv) nounwind { entry: ; SSE2-Darwin-LABEL: t1: -; SSE2-Darwin: movaps _.str, %xmm0 -; SSE2-Darwin: movaps %xmm0 ; SSE2-Darwin: movsd _.str+16, %xmm0 ; SSE2-Darwin: movsd %xmm0, 16(%esp) +; SSE2-Darwin: movaps _.str, %xmm0 +; SSE2-Darwin: movaps %xmm0 ; SSE2-Darwin: movb $0, 24(%esp) ; SSE2-Mingw32-LABEL: t1: -; SSE2-Mingw32: movaps _.str, %xmm0 -; SSE2-Mingw32: movups %xmm0 ; SSE2-Mingw32: movsd _.str+16, %xmm0 ; SSE2-Mingw32: movsd %xmm0, 16(%esp) +; SSE2-Mingw32: movaps _.str, %xmm0 +; SSE2-Mingw32: movups %xmm0 ; SSE2-Mingw32: movb $0, 24(%esp) ; SSE1-LABEL: t1: ; SSE1: movaps _.str, %xmm0 -; SSE1: movb $0, 24(%esp) ; SSE1: movaps %xmm0 +; SSE1: movb $0, 24(%esp) ; SSE1: movl $0, 20(%esp) ; SSE1: movl $0, 16(%esp) diff --git a/test/CodeGen/X86/pr34088.ll b/test/CodeGen/X86/pr34088.ll index 259c7355339..d3667e3884d 100644 --- a/test/CodeGen/X86/pr34088.ll +++ b/test/CodeGen/X86/pr34088.ll @@ -25,8 +25,8 @@ define i32 @pr34088() local_unnamed_addr { ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movaps %xmm0, (%esp) ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD ; CHECK-NEXT: movaps %xmm1, (%esp) +; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll index d53e8285922..e363a82a2b9 100644 --- a/test/CodeGen/X86/widen_arith-3.ll +++ b/test/CodeGen/X86/widen_arith-3.ll @@ -16,9 +16,9 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind { ; CHECK-NEXT: movl {{\.LCPI.*}}, %eax ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_2: # %forbody