From 69b881344b3a240a9e7aab800fa2988e83826be5 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 4 Feb 2015 18:54:01 +0000 Subject: [PATCH] Fixes a bug in vector load legalization that confused bits and bytes. Differential Revision: http://reviews.llvm.org/D7400 llvm-svn: 228168 --- .../SelectionDAG/LegalizeVectorOps.cpp | 6 +- test/CodeGen/X86/pr15267.ll | 75 ++++++++++++++++++- 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index eac404c5036..11e6b38f076 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -554,9 +554,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { BitOffset += SrcEltBits; if (BitOffset >= WideBits) { WideIdx++; - Offset -= WideBits; - if (Offset > 0) { - ShAmt = DAG.getConstant(SrcEltBits - Offset, + BitOffset -= WideBits; + if (BitOffset > 0) { + ShAmt = DAG.getConstant(SrcEltBits - BitOffset, TLI.getShiftAmountTy(WideVT)); Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll index b4dc5fd4716..90df9905fe1 100644 --- a/test/CodeGen/X86/pr15267.ll +++ b/test/CodeGen/X86/pr15267.ll @@ -4,8 +4,7 @@ define <4 x i3> @test1(<4 x i3>* %in) nounwind { %ret = load <4 x i3>* %in, align 1 ret <4 x i3> %ret } - -; CHECK: test1 +; CHECK-LABEL: test1 ; CHECK: movzwl ; CHECK: shrl $3 ; CHECK: andl $7 @@ -25,7 +24,7 @@ define <4 x i1> @test2(<4 x i1>* %in) nounwind { ret <4 x i1> %ret } -; CHECK: test2 +; CHECK-LABEL: test2 ; CHECK: movzbl ; CHECK: shrl ; CHECK: andl $1 @@ -46,7 +45,7 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind { ret <4 x i64> %sext } -; CHECK: test3 +; CHECK-LABEL: test3 ; CHECK: movzbl ; CHECK: movq ; CHECK: shlq @@ -67,3 +66,71 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind { ; CHECK: vpunpcklqdq ; CHECK: vinsertf128 ; CHECK: ret + +define <16 x i4> @test4(<16 x i4>* %in) nounwind { + %ret = load <16 x i4>* %in, align 1 + ret <16 x i4> %ret +} + +; CHECK-LABEL: test4 +; CHECK: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: movl +; CHECK-NEXT: andl +; CHECK-NEXT: vmovd +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movl +; CHECK-NEXT: shrl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: movq +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: shrq +; CHECK-NEXT: vpinsrb +; CHECK-NEXT: retq