1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/X86/dagcombine-cse.ll
Craig Topper e18584075d Recommit r358887 "[TargetLowering][AMDGPU][X86] Improve SimplifyDemandedBits bitcast handling"
I've included a new fix in X86RegisterInfo to prevent PR41619 without
reintroducing r359392. We might be able to improve that in the base class
implementation of shouldRewriteCopySrc somehow. But this hopefully enables
forward progress on SimplifyDemandedBits improvements for now.

Original commit message:

This patch adds support for BigBitWidth -> SmallBitWidth bitcasts, splitting the DemandedBits/Elts accordingly.

The AMDGPU backend needed an extra  (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) combine to encourage BFE creation, I investigated putting this in DAGComb
but it caused a lot of noise on other targets - some improvements, some regressions.

The X86 changes are all definite wins.

llvm-svn: 360552
2019-05-13 04:03:35 +00:00

48 lines
2.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind {
; X32-LABEL: t:
; X32: ## %bb.0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%eax,%ecx), %eax
; X32-NEXT: retl
;
; X64-LABEL: t:
; X64: ## %bb.0: ## %entry
; X64-NEXT: imull %ecx, %esi
; X64-NEXT: addl %edx, %esi
; X64-NEXT: movslq %esi, %rax
; X64-NEXT: movl (%rdi,%rax), %eax
; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: retq
entry:
%tmp7 = mul i32 %idxY, %ref_frame_stride ; <i32> [#uses=2]
%tmp9 = add i32 %tmp7, %idxX ; <i32> [#uses=1]
%tmp11 = getelementptr i8, i8* %ref_frame_ptr, i32 %tmp9 ; <i8*> [#uses=1]
%tmp1112 = bitcast i8* %tmp11 to i32* ; <i32*> [#uses=1]
%tmp13 = load i32, i32* %tmp1112, align 4 ; <i32> [#uses=1]
%tmp18 = add i32 %idxX, 4 ; <i32> [#uses=1]
%tmp20.sum = add i32 %tmp18, %tmp7 ; <i32> [#uses=1]
%tmp21 = getelementptr i8, i8* %ref_frame_ptr, i32 %tmp20.sum ; <i8*> [#uses=1]
%tmp2122 = bitcast i8* %tmp21 to i16* ; <i16*> [#uses=1]
%tmp23 = load i16, i16* %tmp2122, align 2 ; <i16> [#uses=1]
%tmp2425 = zext i16 %tmp23 to i64 ; <i64> [#uses=1]
%tmp26 = shl i64 %tmp2425, 32 ; <i64> [#uses=1]
%tmp2728 = zext i32 %tmp13 to i64 ; <i64> [#uses=1]
%tmp29 = or i64 %tmp26, %tmp2728 ; <i64> [#uses=1]
%tmp3454 = bitcast i64 %tmp29 to double ; <double> [#uses=1]
%tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0 ; <<2 x double>> [#uses=1]
%tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1 ; <<2 x double>> [#uses=1]
%tmp42 = bitcast <2 x double> %tmp36 to <8 x i16> ; <<8 x i16>> [#uses=1]
%tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1]
%tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp48 = extractelement <4 x i32> %tmp47, i32 0 ; <i32> [#uses=1]
ret i32 %tmp48
}