mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
Allow matching extend-from-memory with strict FP nodes
This implements a small enhancement to https://reviews.llvm.org/D55506 Specifically, while we were able to match strict FP nodes for floating-point extend operations with a register as source, this did not work for operations with memory as source. That is because from regular operations, this is represented as a combined "extload" node (which is a variant of a load SD node); but there is no equivalent using a strict FP operation. However, it turns out that even in the absence of an extload node, we can still just match the operations explicitly, e.g. (strict_fpextend (f32 (load node:$ptr)) This patch implements that method to match the LDEB/LXEB/LXDB SystemZ instructions even when the extend uses a strict-FP node. llvm-svn: 364450
This commit is contained in:
parent
a29b8a366a
commit
8564d2aa2a
@ -1224,6 +1224,13 @@ def setle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
def setne : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(setcc node:$lhs, node:$rhs, SETNE)>;
|
||||
|
||||
// We don't have strict FP extended loads as single DAG nodes, but we can
|
||||
// still provide convenience fragments to match those operations.
|
||||
def strict_extloadf32 : PatFrag<(ops node:$ptr),
|
||||
(strict_fpextend (f32 (load node:$ptr)))>;
|
||||
def strict_extloadf64 : PatFrag<(ops node:$ptr),
|
||||
(strict_fpextend (f64 (load node:$ptr)))>;
|
||||
|
||||
// Convenience fragments to match both strict and non-strict fp operations
|
||||
def any_fadd : PatFrags<(ops node:$lhs, node:$rhs),
|
||||
[(strict_fadd node:$lhs, node:$rhs),
|
||||
@ -1291,6 +1298,12 @@ def any_fpround : PatFrags<(ops node:$src),
|
||||
def any_fpextend : PatFrags<(ops node:$src),
|
||||
[(strict_fpextend node:$src),
|
||||
(fpextend node:$src)]>;
|
||||
def any_extloadf32 : PatFrags<(ops node:$ptr),
|
||||
[(strict_extloadf32 node:$ptr),
|
||||
(extloadf32 node:$ptr)]>;
|
||||
def any_extloadf64 : PatFrags<(ops node:$ptr),
|
||||
[(strict_extloadf64 node:$ptr),
|
||||
(extloadf64 node:$ptr)]>;
|
||||
|
||||
multiclass binary_atomic_op_ord<SDNode atomic_op> {
|
||||
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
|
||||
|
@ -208,14 +208,14 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
|
||||
|
||||
// Extend memory floating-point values to wider representations.
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
|
||||
def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
|
||||
def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
|
||||
def LDEB : UnaryRXE<"ldeb", 0xED04, any_extloadf32, FP64, 4>;
|
||||
def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
|
||||
def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
|
||||
}
|
||||
let Predicates = [FeatureNoVectorEnhancements1] in {
|
||||
def : Pat<(f128 (extloadf32 bdxaddr12only:$src)),
|
||||
def : Pat<(f128 (any_extloadf32 bdxaddr12only:$src)),
|
||||
(LXEB bdxaddr12only:$src)>;
|
||||
def : Pat<(f128 (extloadf64 bdxaddr12only:$src)),
|
||||
def : Pat<(f128 (any_extloadf64 bdxaddr12only:$src)),
|
||||
(LXDB bdxaddr12only:$src)>;
|
||||
}
|
||||
|
||||
|
@ -1,9 +1,6 @@
|
||||
; Test strict extensions of f32 to f64.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
|
||||
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
|
||||
|
||||
@ -17,13 +14,10 @@ define double @f1(float %val) {
|
||||
ret double %res
|
||||
}
|
||||
|
||||
; Check extension from memory.
|
||||
; FIXME: This should really use LDEB, but there is no strict "extload" yet.
|
||||
; Check the low end of the LDEB range.
|
||||
define double @f2(float *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK-SCALAR: le %f0, 0(%r2)
|
||||
; CHECK-VECTOR: lde %f0, 0(%r2)
|
||||
; CHECK: ldebr %f0, %f0
|
||||
; CHECK: ldeb %f0, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load float, float *%ptr
|
||||
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
|
||||
@ -31,3 +25,56 @@ define double @f2(float *%ptr) {
|
||||
ret double %res
|
||||
}
|
||||
|
||||
; Check the high end of the aligned LDEB range.
|
||||
define double @f3(float *%base) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: ldeb %f0, 4092(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr float, float *%base, i64 1023
|
||||
%val = load float, float *%ptr
|
||||
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
|
||||
metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
; Check the next word up, which needs separate address logic.
|
||||
; Other sequences besides this one would be OK.
|
||||
define double @f4(float *%base) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: ldeb %f0, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr float, float *%base, i64 1024
|
||||
%val = load float, float *%ptr
|
||||
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
|
||||
metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
; Check negative displacements, which also need separate address logic.
|
||||
define double @f5(float *%base) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: aghi %r2, -4
|
||||
; CHECK: ldeb %f0, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr float, float *%base, i64 -1
|
||||
%val = load float, float *%ptr
|
||||
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
|
||||
metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
; Check that LDEB allows indices.
|
||||
define double @f6(float *%base, i64 %index) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: sllg %r1, %r3, 2
|
||||
; CHECK: ldeb %f0, 400(%r1,%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr float, float *%base, i64 %index
|
||||
%ptr2 = getelementptr float, float *%ptr1, i64 100
|
||||
%val = load float, float *%ptr2
|
||||
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
|
||||
metadata !"fpexcept.strict")
|
||||
ret double %res
|
||||
}
|
||||
|
||||
|
@ -17,12 +17,10 @@ define void @f1(fp128 *%dst, float %val) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check extension from memory.
|
||||
; FIXME: This should really use LXEB, but there is no strict "extload" yet.
|
||||
; Check the low end of the LXEB range.
|
||||
define void @f2(fp128 *%dst, float *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: le %f0, 0(%r3)
|
||||
; CHECK: lxebr %f0, %f0
|
||||
; CHECK: lxeb %f0, 0(%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
@ -33,3 +31,68 @@ define void @f2(fp128 *%dst, float *%ptr) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check the high end of the aligned LXEB range.
|
||||
define void @f3(fp128 *%dst, float *%base) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: lxeb %f0, 4092(%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr float, float *%base, i64 1023
|
||||
%val = load float, float *%ptr
|
||||
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
|
||||
metadata !"fpexcept.strict")
|
||||
store fp128 %res, fp128 *%dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check the next word up, which needs separate address logic.
|
||||
; Other sequences besides this one would be OK.
|
||||
define void @f4(fp128 *%dst, float *%base) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: aghi %r3, 4096
|
||||
; CHECK: lxeb %f0, 0(%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr float, float *%base, i64 1024
|
||||
%val = load float, float *%ptr
|
||||
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
|
||||
metadata !"fpexcept.strict")
|
||||
store fp128 %res, fp128 *%dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check negative displacements, which also need separate address logic.
|
||||
define void @f5(fp128 *%dst, float *%base) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: aghi %r3, -4
|
||||
; CHECK: lxeb %f0, 0(%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr float, float *%base, i64 -1
|
||||
%val = load float, float *%ptr
|
||||
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
|
||||
metadata !"fpexcept.strict")
|
||||
store fp128 %res, fp128 *%dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that LXEB allows indices.
|
||||
define void @f6(fp128 *%dst, float *%base, i64 %index) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: sllg %r1, %r4, 2
|
||||
; CHECK: lxeb %f0, 400(%r1,%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr float, float *%base, i64 %index
|
||||
%ptr2 = getelementptr float, float *%ptr1, i64 100
|
||||
%val = load float, float *%ptr2
|
||||
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
|
||||
metadata !"fpexcept.strict")
|
||||
store fp128 %res, fp128 *%dst
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -17,12 +17,10 @@ define void @f1(fp128 *%dst, double %val) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check extension from memory.
|
||||
; FIXME: This should really use LXDB, but there is no strict "extload" yet.
|
||||
; Check the low end of the LXDB range.
|
||||
define void @f2(fp128 *%dst, double *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: ld %f0, 0(%r3)
|
||||
; CHECK: lxdbr %f0, %f0
|
||||
; CHECK: lxdb %f0, 0(%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
@ -33,3 +31,68 @@ define void @f2(fp128 *%dst, double *%ptr) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check the high end of the aligned LXDB range.
|
||||
define void @f3(fp128 *%dst, double *%base) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: lxdb %f0, 4088(%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i64 511
|
||||
%val = load double, double *%ptr
|
||||
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
|
||||
metadata !"fpexcept.strict")
|
||||
store fp128 %res, fp128 *%dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check the next doubleword up, which needs separate address logic.
|
||||
; Other sequences besides this one would be OK.
|
||||
define void @f4(fp128 *%dst, double *%base) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: aghi %r3, 4096
|
||||
; CHECK: lxdb %f0, 0(%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i64 512
|
||||
%val = load double, double *%ptr
|
||||
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
|
||||
metadata !"fpexcept.strict")
|
||||
store fp128 %res, fp128 *%dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check negative displacements, which also need separate address logic.
|
||||
define void @f5(fp128 *%dst, double *%base) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: aghi %r3, -8
|
||||
; CHECK: lxdb %f0, 0(%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i64 -1
|
||||
%val = load double, double *%ptr
|
||||
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
|
||||
metadata !"fpexcept.strict")
|
||||
store fp128 %res, fp128 *%dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that LXDB allows indices.
|
||||
define void @f6(fp128 *%dst, double *%base, i64 %index) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: sllg %r1, %r4, 3
|
||||
; CHECK: lxdb %f0, 800(%r1,%r3)
|
||||
; CHECK: std %f0, 0(%r2)
|
||||
; CHECK: std %f2, 8(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr double, double *%base, i64 %index
|
||||
%ptr2 = getelementptr double, double *%ptr1, i64 100
|
||||
%val = load double, double *%ptr2
|
||||
%res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
|
||||
metadata !"fpexcept.strict")
|
||||
store fp128 %res, fp128 *%dst
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -5504,15 +5504,14 @@ define <1 x double> @constrained_vector_fpext_v1f32() {
|
||||
; S390X-LABEL: constrained_vector_fpext_v1f32:
|
||||
; S390X: # %bb.0: # %entry
|
||||
; S390X-NEXT: larl %r1, .LCPI99_0
|
||||
; S390X-NEXT: le %f0, 0(%r1)
|
||||
; S390X-NEXT: ldebr %f0, %f0
|
||||
; S390X-NEXT: ldeb %f0, 0(%r1)
|
||||
; S390X-NEXT: br %r14
|
||||
;
|
||||
; SZ13-LABEL: constrained_vector_fpext_v1f32:
|
||||
; SZ13: # %bb.0: # %entry
|
||||
; SZ13-NEXT: larl %r1, .LCPI99_0
|
||||
; SZ13-NEXT: lde %f0, 0(%r1)
|
||||
; SZ13-NEXT: wldeb %v24, %f0
|
||||
; SZ13-NEXT: ldeb %f0, 0(%r1)
|
||||
; SZ13-NEXT: vlr %v24, %v0
|
||||
; SZ13-NEXT: br %r14
|
||||
entry:
|
||||
%result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
|
||||
@ -5525,21 +5524,17 @@ define <2 x double> @constrained_vector_fpext_v2f32() {
|
||||
; S390X-LABEL: constrained_vector_fpext_v2f32:
|
||||
; S390X: # %bb.0: # %entry
|
||||
; S390X-NEXT: larl %r1, .LCPI100_0
|
||||
; S390X-NEXT: le %f0, 0(%r1)
|
||||
; S390X-NEXT: ldeb %f2, 0(%r1)
|
||||
; S390X-NEXT: larl %r1, .LCPI100_1
|
||||
; S390X-NEXT: le %f1, 0(%r1)
|
||||
; S390X-NEXT: ldebr %f2, %f0
|
||||
; S390X-NEXT: ldebr %f0, %f1
|
||||
; S390X-NEXT: ldeb %f0, 0(%r1)
|
||||
; S390X-NEXT: br %r14
|
||||
;
|
||||
; SZ13-LABEL: constrained_vector_fpext_v2f32:
|
||||
; SZ13: # %bb.0: # %entry
|
||||
; SZ13-NEXT: larl %r1, .LCPI100_0
|
||||
; SZ13-NEXT: lde %f0, 0(%r1)
|
||||
; SZ13-NEXT: ldeb %f0, 0(%r1)
|
||||
; SZ13-NEXT: larl %r1, .LCPI100_1
|
||||
; SZ13-NEXT: lde %f1, 0(%r1)
|
||||
; SZ13-NEXT: ldebr %f0, %f0
|
||||
; SZ13-NEXT: ldebr %f1, %f1
|
||||
; SZ13-NEXT: ldeb %f1, 0(%r1)
|
||||
; SZ13-NEXT: vmrhg %v24, %v1, %v0
|
||||
; SZ13-NEXT: br %r14
|
||||
entry:
|
||||
@ -5553,16 +5548,15 @@ define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %de
|
||||
; S390X-LABEL: constrained_vector_fpext_v3f64:
|
||||
; S390X: # %bb.0: # %entry
|
||||
; S390X-NEXT: lg %r0, 0(%r2)
|
||||
; S390X-NEXT: le %f0, 8(%r2)
|
||||
; S390X-NEXT: sllg %r1, %r0, 32
|
||||
; S390X-NEXT: ldgr %f1, %r1
|
||||
; S390X-NEXT: ldgr %f0, %r1
|
||||
; S390X-NEXT: nilf %r0, 0
|
||||
; S390X-NEXT: ldeb %f1, 8(%r2)
|
||||
; S390X-NEXT: ldgr %f2, %r0
|
||||
; S390X-NEXT: ldebr %f2, %f2
|
||||
; S390X-NEXT: ldebr %f1, %f1
|
||||
; S390X-NEXT: ldebr %f0, %f0
|
||||
; S390X-NEXT: std %f0, 16(%r3)
|
||||
; S390X-NEXT: std %f1, 8(%r3)
|
||||
; S390X-NEXT: std %f1, 16(%r3)
|
||||
; S390X-NEXT: std %f0, 8(%r3)
|
||||
; S390X-NEXT: std %f2, 0(%r3)
|
||||
; S390X-NEXT: br %r14
|
||||
;
|
||||
@ -5591,34 +5585,26 @@ define <4 x double> @constrained_vector_fpext_v4f32() {
|
||||
; S390X-LABEL: constrained_vector_fpext_v4f32:
|
||||
; S390X: # %bb.0: # %entry
|
||||
; S390X-NEXT: larl %r1, .LCPI102_0
|
||||
; S390X-NEXT: le %f0, 0(%r1)
|
||||
; S390X-NEXT: ldeb %f6, 0(%r1)
|
||||
; S390X-NEXT: larl %r1, .LCPI102_1
|
||||
; S390X-NEXT: le %f1, 0(%r1)
|
||||
; S390X-NEXT: ldeb %f4, 0(%r1)
|
||||
; S390X-NEXT: larl %r1, .LCPI102_2
|
||||
; S390X-NEXT: le %f2, 0(%r1)
|
||||
; S390X-NEXT: ldeb %f2, 0(%r1)
|
||||
; S390X-NEXT: larl %r1, .LCPI102_3
|
||||
; S390X-NEXT: le %f3, 0(%r1)
|
||||
; S390X-NEXT: ldebr %f6, %f0
|
||||
; S390X-NEXT: ldebr %f4, %f1
|
||||
; S390X-NEXT: ldebr %f2, %f2
|
||||
; S390X-NEXT: ldebr %f0, %f3
|
||||
; S390X-NEXT: ldeb %f0, 0(%r1)
|
||||
; S390X-NEXT: br %r14
|
||||
;
|
||||
; SZ13-LABEL: constrained_vector_fpext_v4f32:
|
||||
; SZ13: # %bb.0: # %entry
|
||||
; SZ13-NEXT: larl %r1, .LCPI102_0
|
||||
; SZ13-NEXT: lde %f0, 0(%r1)
|
||||
; SZ13-NEXT: ldeb %f0, 0(%r1)
|
||||
; SZ13-NEXT: larl %r1, .LCPI102_1
|
||||
; SZ13-NEXT: lde %f1, 0(%r1)
|
||||
; SZ13-NEXT: ldebr %f0, %f0
|
||||
; SZ13-NEXT: ldebr %f1, %f1
|
||||
; SZ13-NEXT: ldeb %f1, 0(%r1)
|
||||
; SZ13-NEXT: larl %r1, .LCPI102_2
|
||||
; SZ13-NEXT: vmrhg %v24, %v1, %v0
|
||||
; SZ13-NEXT: lde %f0, 0(%r1)
|
||||
; SZ13-NEXT: ldeb %f0, 0(%r1)
|
||||
; SZ13-NEXT: larl %r1, .LCPI102_3
|
||||
; SZ13-NEXT: lde %f1, 0(%r1)
|
||||
; SZ13-NEXT: ldebr %f0, %f0
|
||||
; SZ13-NEXT: ldebr %f1, %f1
|
||||
; SZ13-NEXT: ldeb %f1, 0(%r1)
|
||||
; SZ13-NEXT: vmrhg %v26, %v1, %v0
|
||||
; SZ13-NEXT: br %r14
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user