1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

Allow matching extend-from-memory with strict FP nodes

This implements a small enhancement to https://reviews.llvm.org/D55506

Specifically, while we were able to match strict FP nodes for
floating-point extend operations with a register as source, this
did not work for operations with memory as source.

That is because from regular operations, this is represented as
a combined "extload" node (which is a variant of a load SD node);
but there is no equivalent using a strict FP operation.

However, it turns out that even in the absence of an extload
node, we can still just match the operations explicitly, e.g.
   (strict_fpextend (f32 (load node:$ptr))

This patch implements that method to match the LDEB/LXEB/LXDB
SystemZ instructions even when the extend uses a strict-FP node.

llvm-svn: 364450
This commit is contained in:
Ulrich Weigand 2019-06-26 17:19:12 +00:00
parent a29b8a366a
commit 8564d2aa2a
6 changed files with 227 additions and 55 deletions

View File

@ -1224,6 +1224,13 @@ def setle : PatFrag<(ops node:$lhs, node:$rhs),
def setne : PatFrag<(ops node:$lhs, node:$rhs),
(setcc node:$lhs, node:$rhs, SETNE)>;
// We don't have strict FP extended loads as single DAG nodes, but we can
// still provide convenience fragments to match those operations.
def strict_extloadf32 : PatFrag<(ops node:$ptr),
(strict_fpextend (f32 (load node:$ptr)))>;
def strict_extloadf64 : PatFrag<(ops node:$ptr),
(strict_fpextend (f64 (load node:$ptr)))>;
// Convenience fragments to match both strict and non-strict fp operations
def any_fadd : PatFrags<(ops node:$lhs, node:$rhs),
[(strict_fadd node:$lhs, node:$rhs),
@ -1291,6 +1298,12 @@ def any_fpround : PatFrags<(ops node:$src),
def any_fpextend : PatFrags<(ops node:$src),
[(strict_fpextend node:$src),
(fpextend node:$src)]>;
def any_extloadf32 : PatFrags<(ops node:$ptr),
[(strict_extloadf32 node:$ptr),
(extloadf32 node:$ptr)]>;
def any_extloadf64 : PatFrags<(ops node:$ptr),
[(strict_extloadf64 node:$ptr),
(extloadf64 node:$ptr)]>;
multiclass binary_atomic_op_ord<SDNode atomic_op> {
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),

View File

@ -208,14 +208,14 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
// Extend memory floating-point values to wider representations.
let Uses = [FPC], mayRaiseFPException = 1 in {
def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
def LDEB : UnaryRXE<"ldeb", 0xED04, any_extloadf32, FP64, 4>;
def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
}
let Predicates = [FeatureNoVectorEnhancements1] in {
def : Pat<(f128 (extloadf32 bdxaddr12only:$src)),
def : Pat<(f128 (any_extloadf32 bdxaddr12only:$src)),
(LXEB bdxaddr12only:$src)>;
def : Pat<(f128 (extloadf64 bdxaddr12only:$src)),
def : Pat<(f128 (any_extloadf64 bdxaddr12only:$src)),
(LXDB bdxaddr12only:$src)>;
}

View File

@ -1,9 +1,6 @@
; Test strict extensions of f32 to f64.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
@ -17,13 +14,10 @@ define double @f1(float %val) {
ret double %res
}
; Check extension from memory.
; FIXME: This should really use LDEB, but there is no strict "extload" yet.
; Check the low end of the LDEB range.
define double @f2(float *%ptr) {
; CHECK-LABEL: f2:
; CHECK-SCALAR: le %f0, 0(%r2)
; CHECK-VECTOR: lde %f0, 0(%r2)
; CHECK: ldebr %f0, %f0
; CHECK: ldeb %f0, 0(%r2)
; CHECK: br %r14
%val = load float, float *%ptr
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
@ -31,3 +25,56 @@ define double @f2(float *%ptr) {
ret double %res
}
; Check the high end of the aligned LDEB range.
define double @f3(float *%base) {
; CHECK-LABEL: f3:
; CHECK: ldeb %f0, 4092(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%base, i64 1023
%val = load float, float *%ptr
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
metadata !"fpexcept.strict")
ret double %res
}
; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define double @f4(float *%base) {
; CHECK-LABEL: f4:
; CHECK: aghi %r2, 4096
; CHECK: ldeb %f0, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%base, i64 1024
%val = load float, float *%ptr
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
metadata !"fpexcept.strict")
ret double %res
}
; Check negative displacements, which also need separate address logic.
define double @f5(float *%base) {
; CHECK-LABEL: f5:
; CHECK: aghi %r2, -4
; CHECK: ldeb %f0, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%base, i64 -1
%val = load float, float *%ptr
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
metadata !"fpexcept.strict")
ret double %res
}
; Check that LDEB allows indices.
define double @f6(float *%base, i64 %index) {
; CHECK-LABEL: f6:
; CHECK: sllg %r1, %r3, 2
; CHECK: ldeb %f0, 400(%r1,%r2)
; CHECK: br %r14
%ptr1 = getelementptr float, float *%base, i64 %index
%ptr2 = getelementptr float, float *%ptr1, i64 100
%val = load float, float *%ptr2
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
metadata !"fpexcept.strict")
ret double %res
}

View File

@ -17,12 +17,10 @@ define void @f1(fp128 *%dst, float %val) {
ret void
}
; Check extension from memory.
; FIXME: This should really use LXEB, but there is no strict "extload" yet.
; Check the low end of the LXEB range.
define void @f2(fp128 *%dst, float *%ptr) {
; CHECK-LABEL: f2:
; CHECK: le %f0, 0(%r3)
; CHECK: lxebr %f0, %f0
; CHECK: lxeb %f0, 0(%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
@ -33,3 +31,68 @@ define void @f2(fp128 *%dst, float *%ptr) {
ret void
}
; Check the high end of the aligned LXEB range.
define void @f3(fp128 *%dst, float *%base) {
; CHECK-LABEL: f3:
; CHECK: lxeb %f0, 4092(%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%base, i64 1023
%val = load float, float *%ptr
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
metadata !"fpexcept.strict")
store fp128 %res, fp128 *%dst
ret void
}
; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f4(fp128 *%dst, float *%base) {
; CHECK-LABEL: f4:
; CHECK: aghi %r3, 4096
; CHECK: lxeb %f0, 0(%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%base, i64 1024
%val = load float, float *%ptr
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
metadata !"fpexcept.strict")
store fp128 %res, fp128 *%dst
ret void
}
; Check negative displacements, which also need separate address logic.
define void @f5(fp128 *%dst, float *%base) {
; CHECK-LABEL: f5:
; CHECK: aghi %r3, -4
; CHECK: lxeb %f0, 0(%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
%ptr = getelementptr float, float *%base, i64 -1
%val = load float, float *%ptr
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
metadata !"fpexcept.strict")
store fp128 %res, fp128 *%dst
ret void
}
; Check that LXEB allows indices.
define void @f6(fp128 *%dst, float *%base, i64 %index) {
; CHECK-LABEL: f6:
; CHECK: sllg %r1, %r4, 2
; CHECK: lxeb %f0, 400(%r1,%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
%ptr1 = getelementptr float, float *%base, i64 %index
%ptr2 = getelementptr float, float *%ptr1, i64 100
%val = load float, float *%ptr2
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
metadata !"fpexcept.strict")
store fp128 %res, fp128 *%dst
ret void
}

View File

@ -17,12 +17,10 @@ define void @f1(fp128 *%dst, double %val) {
ret void
}
; Check extension from memory.
; FIXME: This should really use LXDB, but there is no strict "extload" yet.
; Check the low end of the LXDB range.
define void @f2(fp128 *%dst, double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: ld %f0, 0(%r3)
; CHECK: lxdbr %f0, %f0
; CHECK: lxdb %f0, 0(%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
@ -33,3 +31,68 @@ define void @f2(fp128 *%dst, double *%ptr) {
ret void
}
; Check the high end of the aligned LXDB range.
define void @f3(fp128 *%dst, double *%base) {
; CHECK-LABEL: f3:
; CHECK: lxdb %f0, 4088(%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 511
%val = load double, double *%ptr
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
metadata !"fpexcept.strict")
store fp128 %res, fp128 *%dst
ret void
}
; Check the next doubleword up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f4(fp128 *%dst, double *%base) {
; CHECK-LABEL: f4:
; CHECK: aghi %r3, 4096
; CHECK: lxdb %f0, 0(%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 512
%val = load double, double *%ptr
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
metadata !"fpexcept.strict")
store fp128 %res, fp128 *%dst
ret void
}
; Check negative displacements, which also need separate address logic.
define void @f5(fp128 *%dst, double *%base) {
; CHECK-LABEL: f5:
; CHECK: aghi %r3, -8
; CHECK: lxdb %f0, 0(%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
%ptr = getelementptr double, double *%base, i64 -1
%val = load double, double *%ptr
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
metadata !"fpexcept.strict")
store fp128 %res, fp128 *%dst
ret void
}
; Check that LXDB allows indices.
define void @f6(fp128 *%dst, double *%base, i64 %index) {
; CHECK-LABEL: f6:
; CHECK: sllg %r1, %r4, 3
; CHECK: lxdb %f0, 800(%r1,%r3)
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
%ptr1 = getelementptr double, double *%base, i64 %index
%ptr2 = getelementptr double, double *%ptr1, i64 100
%val = load double, double *%ptr2
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
metadata !"fpexcept.strict")
store fp128 %res, fp128 *%dst
ret void
}

View File

@ -5504,15 +5504,14 @@ define <1 x double> @constrained_vector_fpext_v1f32() {
; S390X-LABEL: constrained_vector_fpext_v1f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI99_0
; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: ldebr %f0, %f0
; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fpext_v1f32:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: larl %r1, .LCPI99_0
; SZ13-NEXT: lde %f0, 0(%r1)
; SZ13-NEXT: wldeb %v24, %f0
; SZ13-NEXT: ldeb %f0, 0(%r1)
; SZ13-NEXT: vlr %v24, %v0
; SZ13-NEXT: br %r14
entry:
%result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
@ -5525,21 +5524,17 @@ define <2 x double> @constrained_vector_fpext_v2f32() {
; S390X-LABEL: constrained_vector_fpext_v2f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI100_0
; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: ldeb %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI100_1
; S390X-NEXT: le %f1, 0(%r1)
; S390X-NEXT: ldebr %f2, %f0
; S390X-NEXT: ldebr %f0, %f1
; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fpext_v2f32:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: larl %r1, .LCPI100_0
; SZ13-NEXT: lde %f0, 0(%r1)
; SZ13-NEXT: ldeb %f0, 0(%r1)
; SZ13-NEXT: larl %r1, .LCPI100_1
; SZ13-NEXT: lde %f1, 0(%r1)
; SZ13-NEXT: ldebr %f0, %f0
; SZ13-NEXT: ldebr %f1, %f1
; SZ13-NEXT: ldeb %f1, 0(%r1)
; SZ13-NEXT: vmrhg %v24, %v1, %v0
; SZ13-NEXT: br %r14
entry:
@ -5553,16 +5548,15 @@ define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %de
; S390X-LABEL: constrained_vector_fpext_v3f64:
; S390X: # %bb.0: # %entry
; S390X-NEXT: lg %r0, 0(%r2)
; S390X-NEXT: le %f0, 8(%r2)
; S390X-NEXT: sllg %r1, %r0, 32
; S390X-NEXT: ldgr %f1, %r1
; S390X-NEXT: ldgr %f0, %r1
; S390X-NEXT: nilf %r0, 0
; S390X-NEXT: ldeb %f1, 8(%r2)
; S390X-NEXT: ldgr %f2, %r0
; S390X-NEXT: ldebr %f2, %f2
; S390X-NEXT: ldebr %f1, %f1
; S390X-NEXT: ldebr %f0, %f0
; S390X-NEXT: std %f0, 16(%r3)
; S390X-NEXT: std %f1, 8(%r3)
; S390X-NEXT: std %f1, 16(%r3)
; S390X-NEXT: std %f0, 8(%r3)
; S390X-NEXT: std %f2, 0(%r3)
; S390X-NEXT: br %r14
;
@ -5591,34 +5585,26 @@ define <4 x double> @constrained_vector_fpext_v4f32() {
; S390X-LABEL: constrained_vector_fpext_v4f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI102_0
; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: ldeb %f6, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI102_1
; S390X-NEXT: le %f1, 0(%r1)
; S390X-NEXT: ldeb %f4, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI102_2
; S390X-NEXT: le %f2, 0(%r1)
; S390X-NEXT: ldeb %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI102_3
; S390X-NEXT: le %f3, 0(%r1)
; S390X-NEXT: ldebr %f6, %f0
; S390X-NEXT: ldebr %f4, %f1
; S390X-NEXT: ldebr %f2, %f2
; S390X-NEXT: ldebr %f0, %f3
; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fpext_v4f32:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: larl %r1, .LCPI102_0
; SZ13-NEXT: lde %f0, 0(%r1)
; SZ13-NEXT: ldeb %f0, 0(%r1)
; SZ13-NEXT: larl %r1, .LCPI102_1
; SZ13-NEXT: lde %f1, 0(%r1)
; SZ13-NEXT: ldebr %f0, %f0
; SZ13-NEXT: ldebr %f1, %f1
; SZ13-NEXT: ldeb %f1, 0(%r1)
; SZ13-NEXT: larl %r1, .LCPI102_2
; SZ13-NEXT: vmrhg %v24, %v1, %v0
; SZ13-NEXT: lde %f0, 0(%r1)
; SZ13-NEXT: ldeb %f0, 0(%r1)
; SZ13-NEXT: larl %r1, .LCPI102_3
; SZ13-NEXT: lde %f1, 0(%r1)
; SZ13-NEXT: ldebr %f0, %f0
; SZ13-NEXT: ldebr %f1, %f1
; SZ13-NEXT: ldeb %f1, 0(%r1)
; SZ13-NEXT: vmrhg %v26, %v1, %v0
; SZ13-NEXT: br %r14
entry: