mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
b7845c5bbc
Summary: This patch adds a special DAG combine for SSE1 to recognize the IR pattern InstCombine gives us for movmsk. This only does the recognition for a few cases where its obvious the input won't be scalarized resulting in building a vector just do to the movmsk. I've made it separate from our existing matching for movmsk since that's called in multiple places and I didn't spend time to see if the other callers would make sense here. Plus the restrictions and additional checks would complicate that. This fixes the case from PR42870. Buts its probably still broken the presence of logic ops feeding the movmsk pattern which would further hide the v4f32 type. Reviewers: spatel, RKSimon, xbolva00 Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65689 llvm-svn: 368506
32 lines
918 B
LLVM
32 lines
918 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=sse | FileCheck %s
|
|
|
|
define i32 @foo(<4 x float>* %a) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: ## %bb.0: ## %start
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: movaps (%eax), %xmm0
|
|
; CHECK-NEXT: movmskps %xmm0, %eax
|
|
; CHECK-NEXT: retl
|
|
start:
|
|
%0 = bitcast <4 x float>* %a to <4 x i32>*
|
|
%1 = load <4 x i32>, <4 x i32>* %0, align 16
|
|
%2 = icmp slt <4 x i32> %1, zeroinitializer
|
|
%3 = bitcast <4 x i1> %2 to i4
|
|
%4 = zext i4 %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define i32 @bar(<4 x float> %a) {
|
|
; CHECK-LABEL: bar:
|
|
; CHECK: ## %bb.0: ## %start
|
|
; CHECK-NEXT: movmskps %xmm0, %eax
|
|
; CHECK-NEXT: retl
|
|
start:
|
|
%0 = bitcast <4 x float> %a to <4 x i32>
|
|
%1 = icmp slt <4 x i32> %0, zeroinitializer
|
|
%2 = bitcast <4 x i1> %1 to i4
|
|
%3 = zext i4 %2 to i32
|
|
ret i32 %3
|
|
}
|