1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[Hexagon] Prefer L2_loadrub_io over L4_loadrub_rr

If the offset is an immediate, avoid putting it in a register
to get Rs+Rt<<#0.

llvm-svn: 317275
This commit is contained in:
Krzysztof Parzyszek 2017-11-02 21:56:59 +00:00
parent 8e98660f03
commit e437332dc6
2 changed files with 89 additions and 49 deletions

View File

@ -1706,28 +1706,27 @@ multiclass Loadxim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
defm: Loadxgim_pat<Load, VT, ValueMod, ImmPred, MI>;
}
// Patterns to select load reg reg-indexed: Rs + Rt<<u2.
multiclass Loadxr_pat<PatFrag Load, ValueType VT, InstHexagon MI> {
let AddedComplexity = 40 in
def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
(VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
// Pattern to select load reg reg-indexed: Rs + Rt<<u2.
class Loadxr_shl_pat<PatFrag Load, ValueType VT, InstHexagon MI>
: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
(VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
let AddedComplexity = 20 in
def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
(VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
}
// Pattern to select load reg reg-indexed: Rs + Rt<<0.
class Loadxr_add_pat<PatFrag Load, ValueType VT, InstHexagon MI>
: Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
(VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
// Patterns to select load reg reg-indexed: Rs + Rt<<u2 with value modifier.
multiclass Loadxrm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
InstHexagon MI> {
let AddedComplexity = 40 in
def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
(VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>;
// Pattern to select load reg reg-indexed: Rs + Rt<<u2 with value modifier.
class Loadxrm_shl_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
InstHexagon MI>
: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
(VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>;
let AddedComplexity = 20 in
def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
(VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>;
}
// Pattern to select load reg reg-indexed: Rs + Rt<<0 with value modifier.
class Loadxrm_add_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
InstHexagon MI>
: Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
(VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>;
// Pattern to select load long-offset reg-indexed: Addr + Rt<<u2.
// Don't match for u2==0, instead use reg+imm for those cases.
@ -1777,17 +1776,19 @@ let AddedComplexity = 20 in {
defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>;
}
defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>;
defm: Loadxim_pat<sextloadi16, i64, ToSext64, anyimm1, L2_loadrh_io>;
defm: Loadxim_pat<sextloadi32, i64, ToSext64, anyimm2, L2_loadri_io>;
let AddedComplexity = 30 in {
defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>;
defm: Loadxim_pat<sextloadi16, i64, ToSext64, anyimm1, L2_loadrh_io>;
defm: Loadxim_pat<sextloadi32, i64, ToSext64, anyimm2, L2_loadri_io>;
}
let AddedComplexity = 60 in {
def: Loadxu_pat<extloadi8, i32, anyimm0, L4_loadrub_ur>;
@ -1818,26 +1819,55 @@ let AddedComplexity = 60 in {
def: Loadxum_pat<extloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
}
defm: Loadxr_pat<extloadi8, i32, L4_loadrub_rr>;
defm: Loadxr_pat<zextloadi8, i32, L4_loadrub_rr>;
defm: Loadxr_pat<sextloadi8, i32, L4_loadrb_rr>;
defm: Loadxr_pat<extloadi16, i32, L4_loadruh_rr>;
defm: Loadxr_pat<zextloadi16, i32, L4_loadruh_rr>;
defm: Loadxr_pat<sextloadi16, i32, L4_loadrh_rr>;
defm: Loadxr_pat<load, i32, L4_loadri_rr>;
defm: Loadxr_pat<load, i64, L4_loadrd_rr>;
defm: Loadxr_pat<load, f32, L4_loadri_rr>;
defm: Loadxr_pat<load, f64, L4_loadrd_rr>;
let AddedComplexity = 40 in {
def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>;
def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>;
def: Loadxr_shl_pat<load, i32, L4_loadri_rr>;
def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>;
def: Loadxr_shl_pat<load, f32, L4_loadri_rr>;
def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>;
}
defm: Loadxrm_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
defm: Loadxrm_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
defm: Loadxrm_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
defm: Loadxrm_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
defm: Loadxrm_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
defm: Loadxrm_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
defm: Loadxrm_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
defm: Loadxrm_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
defm: Loadxrm_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
let AddedComplexity = 20 in {
def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>;
def: Loadxr_add_pat<load, i32, L4_loadri_rr>;
def: Loadxr_add_pat<load, i64, L4_loadrd_rr>;
def: Loadxr_add_pat<load, f32, L4_loadri_rr>;
def: Loadxr_add_pat<load, f64, L4_loadrd_rr>;
}
let AddedComplexity = 40 in {
def: Loadxrm_shl_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
def: Loadxrm_shl_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
def: Loadxrm_shl_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
}
let AddedComplexity = 20 in {
def: Loadxrm_add_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
def: Loadxrm_add_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
def: Loadxrm_add_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
}
// Absolute address

View File

@ -54,4 +54,14 @@ b2:
ret i32 %v6
}
; CHECK-LABEL: Prefer_L2_loadrub_io:
; CHECK: memub(r0+#65)
define i64 @Prefer_L2_loadrub_io(i8* %a0) #0 {
b1:
%v2 = getelementptr i8, i8* %a0, i32 65
%v3 = load i8, i8* %v2
%v4 = zext i8 %v3 to i64
ret i64 %v4
}
attributes #0 = { nounwind readnone }