mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
ace163be2a
getOperationCost() is not the cost we wanted; that's not the throughput value that the rest of the calculation uses. We may want to switch everything in this code to use the getInstructionThroughput() wrapper to avoid these kinds of problems, but I'll look at that as a follow-up because that can create other logical diffs via using optional parameters (we'd need to speculatively create the vector instruction to make a fair(er) comparison).
103 lines
3.8 KiB
LLVM
103 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
|
|
; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
|
|
|
|
define i1 @cmp_v4i32(<4 x float> %arg, <4 x float> %arg1) {
|
|
; CHECK-LABEL: @cmp_v4i32(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[T:%.*]] = bitcast <4 x float> [[ARG:%.*]] to <4 x i32>
|
|
; CHECK-NEXT: [[T3:%.*]] = bitcast <4 x float> [[ARG1:%.*]] to <4 x i32>
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[T]], [[T3]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[BB6:%.*]], label [[BB18:%.*]]
|
|
; CHECK: bb6:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[T]], [[T3]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP3]], label [[BB10:%.*]], label [[BB18]]
|
|
; CHECK: bb10:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[T]], [[T3]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[BB14:%.*]], label [[BB18]]
|
|
; CHECK: bb14:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[T]], [[T3]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
|
|
; CHECK-NEXT: br label [[BB18]]
|
|
; CHECK: bb18:
|
|
; CHECK-NEXT: [[T19:%.*]] = phi i1 [ false, [[BB10]] ], [ false, [[BB6]] ], [ false, [[BB:%.*]] ], [ [[TMP7]], [[BB14]] ]
|
|
; CHECK-NEXT: ret i1 [[T19]]
|
|
;
|
|
bb:
|
|
%t = bitcast <4 x float> %arg to <4 x i32>
|
|
%t2 = extractelement <4 x i32> %t, i32 0
|
|
%t3 = bitcast <4 x float> %arg1 to <4 x i32>
|
|
%t4 = extractelement <4 x i32> %t3, i32 0
|
|
%t5 = icmp eq i32 %t2, %t4
|
|
br i1 %t5, label %bb6, label %bb18
|
|
|
|
bb6:
|
|
%t7 = extractelement <4 x i32> %t, i32 1
|
|
%t8 = extractelement <4 x i32> %t3, i32 1
|
|
%t9 = icmp eq i32 %t7, %t8
|
|
br i1 %t9, label %bb10, label %bb18
|
|
|
|
bb10:
|
|
%t11 = extractelement <4 x i32> %t, i32 2
|
|
%t12 = extractelement <4 x i32> %t3, i32 2
|
|
%t13 = icmp eq i32 %t11, %t12
|
|
br i1 %t13, label %bb14, label %bb18
|
|
|
|
bb14:
|
|
%t15 = extractelement <4 x i32> %t, i32 3
|
|
%t16 = extractelement <4 x i32> %t3, i32 3
|
|
%t17 = icmp eq i32 %t15, %t16
|
|
br label %bb18
|
|
|
|
bb18:
|
|
%t19 = phi i1 [ false, %bb10 ], [ false, %bb6 ], [ false, %bb ], [ %t17, %bb14 ]
|
|
ret i1 %t19
|
|
}
|
|
|
|
define i32 @cmp_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
|
|
; SSE-LABEL: @cmp_v2f64(
|
|
; SSE-NEXT: entry:
|
|
; SSE-NEXT: [[X1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
|
|
; SSE-NEXT: [[Y1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 1
|
|
; SSE-NEXT: [[CMP1:%.*]] = fcmp oeq double [[X1]], [[Y1]]
|
|
; SSE-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F:%.*]]
|
|
; SSE: t:
|
|
; SSE-NEXT: [[Z1:%.*]] = extractelement <2 x double> [[Z:%.*]], i32 1
|
|
; SSE-NEXT: [[CMP2:%.*]] = fcmp ogt double [[Y1]], [[Z1]]
|
|
; SSE-NEXT: [[E:%.*]] = select i1 [[CMP2]], i32 42, i32 99
|
|
; SSE-NEXT: ret i32 [[E]]
|
|
; SSE: f:
|
|
; SSE-NEXT: ret i32 0
|
|
;
|
|
; AVX-LABEL: @cmp_v2f64(
|
|
; AVX-NEXT: entry:
|
|
; AVX-NEXT: [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]]
|
|
; AVX-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
|
|
; AVX-NEXT: br i1 [[TMP1]], label [[T:%.*]], label [[F:%.*]]
|
|
; AVX: t:
|
|
; AVX-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]]
|
|
; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
|
|
; AVX-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 42, i32 99
|
|
; AVX-NEXT: ret i32 [[E]]
|
|
; AVX: f:
|
|
; AVX-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%x1 = extractelement <2 x double> %x, i32 1
|
|
%y1 = extractelement <2 x double> %y, i32 1
|
|
%cmp1 = fcmp oeq double %x1, %y1
|
|
br i1 %cmp1, label %t, label %f
|
|
|
|
t:
|
|
%z1 = extractelement <2 x double> %z, i32 1
|
|
%cmp2 = fcmp ogt double %y1, %z1
|
|
%e = select i1 %cmp2, i32 42, i32 99
|
|
ret i32 %e
|
|
|
|
f:
|
|
ret i32 0
|
|
}
|