1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00
llvm-mirror/test/CodeGen/X86/pr42870.ll
Craig Topper b7845c5bbc [X86] Match the IR pattern form movmsk on SSE1 only targets where v4i32 isn't legal
Summary:
This patch adds a special DAG combine for SSE1 to recognize the IR pattern InstCombine gives us for movmsk. This only does the recognition for a few cases where its obvious the input won't be scalarized resulting in building a vector just do to the movmsk. I've made it separate from our existing matching for movmsk since that's called in multiple places and I didn't spend time to see if the other callers would make sense here. Plus the restrictions and additional checks would complicate that.

This fixes the case from PR42870. Buts its probably still broken the presence of logic ops feeding the movmsk pattern which would further hide the v4f32 type.

Reviewers: spatel, RKSimon, xbolva00

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65689

llvm-svn: 368506
2019-08-10 07:51:13 +00:00

32 lines
918 B
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=sse | FileCheck %s
define i32 @foo(<4 x float>* %a) {
; CHECK-LABEL: foo:
; CHECK: ## %bb.0: ## %start
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movaps (%eax), %xmm0
; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: retl
start:
%0 = bitcast <4 x float>* %a to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 16
%2 = icmp slt <4 x i32> %1, zeroinitializer
%3 = bitcast <4 x i1> %2 to i4
%4 = zext i4 %3 to i32
ret i32 %4
}
define i32 @bar(<4 x float> %a) {
; CHECK-LABEL: bar:
; CHECK: ## %bb.0: ## %start
; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: retl
start:
%0 = bitcast <4 x float> %a to <4 x i32>
%1 = icmp slt <4 x i32> %0, zeroinitializer
%2 = bitcast <4 x i1> %1 to i4
%3 = zext i4 %2 to i32
ret i32 %3
}