mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
c321ffbbcb
The IPM sequence currently generated to compute the strcmp/memcmp result will return INT_MIN for the "less than zero" case. While this is in compliance with the standard, strictly speaking, it turns out that common applications cannot handle this, e.g. because they negate a comparison result in order to implement reverse compares. This patch changes code to use a different sequence that will result in -2 for the "less than zero" case (same as GCC). However, this requires that the two source operands of the compare instructions are inverted, which breaks the optimization in removeIPMBasedCompare. Therefore, I've removed this (and all of optimizeCompareInstr), and replaced it with a mostly equivalent optimization in combineCCMask at the DAGcombine level. llvm-svn: 353304
222 lines
5.2 KiB
LLVM
222 lines
5.2 KiB
LLVM
; Test memcmp using CLC, with i32 results.
|
|
;
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
|
|
|
declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
|
|
|
|
; Zero-length comparisons should be optimized away.
|
|
define i32 @f1(i8 *%src1, i8 *%src2) {
|
|
; CHECK-LABEL: f1:
|
|
; CHECK: lhi %r2, 0
|
|
; CHECK: br %r14
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0)
|
|
ret i32 %res
|
|
}
|
|
|
|
; Check a case where the result is used as an integer.
|
|
define i32 @f2(i8 *%src1, i8 *%src2) {
|
|
; CHECK-LABEL: f2:
|
|
; CHECK: clc 0(2,%r3), 0(%r2)
|
|
; CHECK: ipm %r2
|
|
; CHECK: sll %r2, 2
|
|
; CHECK: sra %r2, 30
|
|
; CHECK: br %r14
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
|
|
ret i32 %res
|
|
}
|
|
|
|
; Check a case where the result is tested for equality.
|
|
define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
; CHECK-LABEL: f3:
|
|
; CHECK: clc 0(3,%r3), 0(%r2)
|
|
; CHECK-NEXT: ber %r14
|
|
; CHECK: br %r14
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
|
|
%cmp = icmp eq i32 %res, 0
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
store:
|
|
store i32 0, i32 *%dest
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Check a case where the result is tested for inequality.
|
|
define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
; CHECK-LABEL: f4:
|
|
; CHECK: clc 0(4,%r3), 0(%r2)
|
|
; CHECK-NEXT: blhr %r14
|
|
; CHECK: br %r14
|
|
entry:
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4)
|
|
%cmp = icmp ne i32 %res, 0
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
store:
|
|
store i32 0, i32 *%dest
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Check a case where the result is tested via slt.
|
|
define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
; CHECK-LABEL: f5:
|
|
; CHECK: clc 0(5,%r3), 0(%r2)
|
|
; CHECK-NEXT: bhr %r14
|
|
; CHECK: br %r14
|
|
entry:
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
|
|
%cmp = icmp slt i32 %res, 0
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
store:
|
|
store i32 0, i32 *%dest
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Check a case where the result is tested for sgt.
|
|
define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
; CHECK-LABEL: f6:
|
|
; CHECK: clc 0(6,%r3), 0(%r2)
|
|
; CHECK-NEXT: blr %r14
|
|
; CHECK: br %r14
|
|
entry:
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
|
|
%cmp = icmp sgt i32 %res, 0
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
store:
|
|
store i32 0, i32 *%dest
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Check the upper end of the CLC range. Here the result is used both as
|
|
; an integer and for branching.
|
|
define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
; CHECK-LABEL: f7:
|
|
; CHECK: clc 0(256,%r3), 0(%r2)
|
|
; CHECK: ipm %r2
|
|
; CHECK: sll %r2, 2
|
|
; CHECK: sra %r2, 30
|
|
; CHECK: blr %r14
|
|
; CHECK: br %r14
|
|
entry:
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256)
|
|
%cmp = icmp slt i32 %res, 0
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
store:
|
|
store i32 0, i32 *%dest
|
|
br label %exit
|
|
|
|
exit:
|
|
ret i32 %res
|
|
}
|
|
|
|
; 257 bytes needs two CLCs.
|
|
define i32 @f8(i8 *%src1, i8 *%src2) {
|
|
; CHECK-LABEL: f8:
|
|
; CHECK: clc 0(256,%r3), 0(%r2)
|
|
; CHECK: jlh [[LABEL:\..*]]
|
|
; CHECK: clc 256(1,%r3), 256(%r2)
|
|
; CHECK: [[LABEL]]:
|
|
; CHECK: ipm [[REG:%r[0-5]]]
|
|
; CHECK: br %r14
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
|
|
ret i32 %res
|
|
}
|
|
|
|
; Test a comparison of 258 bytes in which the CC result can be used directly.
|
|
define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
; CHECK-LABEL: f9:
|
|
; CHECK: clc 0(256,%r3), 0(%r2)
|
|
; CHECK: jlh [[LABEL:\..*]]
|
|
; CHECK: clc 256(1,%r3), 256(%r2)
|
|
; CHECK: [[LABEL]]:
|
|
; CHECK-NEXT: bhr %r14
|
|
; CHECK: br %r14
|
|
entry:
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
|
|
%cmp = icmp slt i32 %res, 0
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
store:
|
|
store i32 0, i32 *%dest
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Test the largest size that can use two CLCs.
|
|
define i32 @f10(i8 *%src1, i8 *%src2) {
|
|
; CHECK-LABEL: f10:
|
|
; CHECK: clc 0(256,%r3), 0(%r2)
|
|
; CHECK: jlh [[LABEL:\..*]]
|
|
; CHECK: clc 256(256,%r3), 256(%r2)
|
|
; CHECK: [[LABEL]]:
|
|
; CHECK: ipm [[REG:%r[0-5]]]
|
|
; CHECK: br %r14
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512)
|
|
ret i32 %res
|
|
}
|
|
|
|
; Test the smallest size that needs 3 CLCs.
|
|
define i32 @f11(i8 *%src1, i8 *%src2) {
|
|
; CHECK-LABEL: f11:
|
|
; CHECK: clc 0(256,%r3), 0(%r2)
|
|
; CHECK: jlh [[LABEL:\..*]]
|
|
; CHECK: clc 256(256,%r3), 256(%r2)
|
|
; CHECK: jlh [[LABEL]]
|
|
; CHECK: clc 512(1,%r3), 512(%r2)
|
|
; CHECK: [[LABEL]]:
|
|
; CHECK: ipm [[REG:%r[0-5]]]
|
|
; CHECK: br %r14
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513)
|
|
ret i32 %res
|
|
}
|
|
|
|
; Test the largest size than can use 3 CLCs.
|
|
define i32 @f12(i8 *%src1, i8 *%src2) {
|
|
; CHECK-LABEL: f12:
|
|
; CHECK: clc 0(256,%r3), 0(%r2)
|
|
; CHECK: jlh [[LABEL:\..*]]
|
|
; CHECK: clc 256(256,%r3), 256(%r2)
|
|
; CHECK: jlh [[LABEL]]
|
|
; CHECK: clc 512(256,%r3), 512(%r2)
|
|
; CHECK: [[LABEL]]:
|
|
; CHECK: ipm [[REG:%r[0-5]]]
|
|
; CHECK: br %r14
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768)
|
|
ret i32 %res
|
|
}
|
|
|
|
; The next size up uses a loop instead. We leave the more complicated
|
|
; loop tests to memcpy-01.ll, which shares the same form.
|
|
define i32 @f13(i8 *%src1, i8 *%src2) {
|
|
; CHECK-LABEL: f13:
|
|
; CHECK: lghi [[COUNT:%r[0-5]]], 3
|
|
; CHECK: [[LOOP:.L[^:]*]]:
|
|
; CHECK: clc 0(256,%r3), 0(%r2)
|
|
; CHECK: jlh [[LABEL:\..*]]
|
|
; CHECK-DAG: la %r2, 256(%r2)
|
|
; CHECK-DAG: la %r3, 256(%r3)
|
|
; CHECK: brctg [[COUNT]], [[LOOP]]
|
|
; CHECK: clc 0(1,%r3), 0(%r2)
|
|
; CHECK: [[LABEL]]:
|
|
; CHECK: ipm [[REG:%r[0-5]]]
|
|
; CHECK: br %r14
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)
|
|
ret i32 %res
|
|
}
|