mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[Hexagon] Intrinsics for circular and bit-reversed loads and stores
llvm-svn: 232645
This commit is contained in:
parent
e60e76fab6
commit
74e58441b5
@ -422,12 +422,42 @@ class Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
|
||||
llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class Hexagon_mem_memmemsi_Intrinsic<string GCCIntSuffix>
|
||||
: Hexagon_Intrinsic<GCCIntSuffix,
|
||||
[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty,
|
||||
llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
|
||||
class Hexagon_mem_memsisi_Intrinsic<string GCCIntSuffix>
|
||||
: Hexagon_Intrinsic<GCCIntSuffix,
|
||||
[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty,
|
||||
llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
|
||||
class Hexagon_mem_memdisi_Intrinsic<string GCCIntSuffix>
|
||||
: Hexagon_Intrinsic<GCCIntSuffix,
|
||||
[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty,
|
||||
llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
|
||||
class Hexagon_mem_memmemsisi_Intrinsic<string GCCIntSuffix>
|
||||
: Hexagon_Intrinsic<GCCIntSuffix,
|
||||
[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty,
|
||||
llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
|
||||
class Hexagon_mem_memsisisi_Intrinsic<string GCCIntSuffix>
|
||||
: Hexagon_Intrinsic<GCCIntSuffix,
|
||||
[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
|
||||
class Hexagon_mem_memdisisi_Intrinsic<string GCCIntSuffix>
|
||||
: Hexagon_Intrinsic<GCCIntSuffix,
|
||||
[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty,
|
||||
llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
|
||||
//
|
||||
// Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
|
||||
//
|
||||
@ -606,20 +636,126 @@ class Hexagon_df_dfdfdfqi_Intrinsic<string GCCIntSuffix>
|
||||
[IntrNoMem, Throws]>;
|
||||
|
||||
|
||||
// This one below will not be generated from iset.py.
|
||||
// So make sure, you don't overwrite this one.
|
||||
// This one below will not be auto-generated,
|
||||
// so make sure, you don't overwrite this one.
|
||||
//
|
||||
// BUILTIN_INFO(SI_to_SXTHI_asrh,SI_ftype_SI,1)
|
||||
//
|
||||
def int_hexagon_SI_to_SXTHI_asrh :
|
||||
Hexagon_si_si_Intrinsic<"SI_to_SXTHI_asrh">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_ldd,PTR_ftype_PTRPTRSI,3)
|
||||
//
|
||||
def int_hexagon_brev_ldd :
|
||||
Hexagon_mem_memmemsi_Intrinsic<"brev_ldd">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_ldw,PTR_ftype_PTRPTRSI,3)
|
||||
//
|
||||
def int_hexagon_brev_ldw :
|
||||
Hexagon_mem_memmemsi_Intrinsic<"brev_ldw">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_ldh,PTR_ftype_PTRPTRSI,3)
|
||||
//
|
||||
def int_hexagon_brev_ldh :
|
||||
Hexagon_mem_memmemsi_Intrinsic<"brev_ldh">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_lduh,PTR_ftype_PTRPTRSI,3)
|
||||
//
|
||||
def int_hexagon_brev_lduh :
|
||||
Hexagon_mem_memmemsi_Intrinsic<"brev_lduh">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_ldb,PTR_ftype_PTRPTRSI,3)
|
||||
//
|
||||
def int_hexagon_brev_ldb :
|
||||
Hexagon_mem_memmemsi_Intrinsic<"brev_ldb">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_ldub,PTR_ftype_PTRPTRSI,3)
|
||||
//
|
||||
def int_hexagon_brev_ldub :
|
||||
Hexagon_mem_memmemsi_Intrinsic<"brev_ldub">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4)
|
||||
//
|
||||
def int_hexagon_circ_ldd :
|
||||
Hexagon_mem_memmemsisi_Intrinsic<"circ_ldd">;
|
||||
// This one above will not be generated from iset.py.
|
||||
// So make sure, you don't overwrite this one.
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_ldw,PTR_ftype_PTRPTRSISI,4)
|
||||
//
|
||||
def int_hexagon_circ_ldw :
|
||||
Hexagon_mem_memmemsisi_Intrinsic<"circ_ldw">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_ldh,PTR_ftype_PTRPTRSISI,4)
|
||||
//
|
||||
def int_hexagon_circ_ldh :
|
||||
Hexagon_mem_memmemsisi_Intrinsic<"circ_ldh">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_lduh,PTR_ftype_PTRPTRSISI,4)
|
||||
//
|
||||
def int_hexagon_circ_lduh :
|
||||
Hexagon_mem_memmemsisi_Intrinsic<"circ_lduh">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_ldb,PTR_ftype_PTRPTRSISI,4)
|
||||
//
|
||||
def int_hexagon_circ_ldb :
|
||||
Hexagon_mem_memmemsisi_Intrinsic<"circ_ldb">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_ldub,PTR_ftype_PTRPTRSISI,4)
|
||||
//
|
||||
def int_hexagon_circ_ldub :
|
||||
Hexagon_mem_memmemsisi_Intrinsic<"circ_ldub">;
|
||||
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_stb,PTR_ftype_PTRSISI,3)
|
||||
//
|
||||
def int_hexagon_brev_stb :
|
||||
Hexagon_mem_memsisi_Intrinsic<"brev_stb">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_sthhi,PTR_ftype_PTRSISI,3)
|
||||
//
|
||||
def int_hexagon_brev_sthhi :
|
||||
Hexagon_mem_memsisi_Intrinsic<"brev_sthhi">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_sth,PTR_ftype_PTRSISI,3)
|
||||
//
|
||||
def int_hexagon_brev_sth :
|
||||
Hexagon_mem_memsisi_Intrinsic<"brev_sth">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_stw,PTR_ftype_PTRSISI,3)
|
||||
//
|
||||
def int_hexagon_brev_stw :
|
||||
Hexagon_mem_memsisi_Intrinsic<"brev_stw">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(brev_std,PTR_ftype_PTRSISI,3)
|
||||
//
|
||||
def int_hexagon_brev_std :
|
||||
Hexagon_mem_memdisi_Intrinsic<"brev_std">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_std,PTR_ftype_PTRDISISI,4)
|
||||
//
|
||||
def int_hexagon_circ_std :
|
||||
Hexagon_mem_memdisisi_Intrinsic<"circ_std">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_stw,PTR_ftype_PTRSISISI,4)
|
||||
//
|
||||
def int_hexagon_circ_stw :
|
||||
Hexagon_mem_memsisisi_Intrinsic<"circ_stw">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_sth,PTR_ftype_PTRSISISI,4)
|
||||
//
|
||||
def int_hexagon_circ_sth :
|
||||
Hexagon_mem_memsisisi_Intrinsic<"circ_sth">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_sthhi,PTR_ftype_PTRSISISI,4)
|
||||
//
|
||||
def int_hexagon_circ_sthhi :
|
||||
Hexagon_mem_memsisisi_Intrinsic<"circ_sthhi">;
|
||||
//
|
||||
// BUILTIN_INFO_NONCONST(circ_stb,PTR_ftype_PTRSISISI,4)
|
||||
//
|
||||
def int_hexagon_circ_stb :
|
||||
Hexagon_mem_memsisisi_Intrinsic<"circ_stb">;
|
||||
|
||||
|
||||
//
|
||||
// BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2)
|
||||
//
|
||||
|
@ -79,7 +79,166 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
|
||||
++MII) {
|
||||
MachineInstr *MI = MII;
|
||||
int Opc = MI->getOpcode();
|
||||
if (Opc == Hexagon::STriw_pred) {
|
||||
if (Opc == Hexagon::S2_storerb_pci_pseudo ||
|
||||
Opc == Hexagon::S2_storerh_pci_pseudo ||
|
||||
Opc == Hexagon::S2_storeri_pci_pseudo ||
|
||||
Opc == Hexagon::S2_storerd_pci_pseudo ||
|
||||
Opc == Hexagon::S2_storerf_pci_pseudo) {
|
||||
unsigned Opcode;
|
||||
if (Opc == Hexagon::S2_storerd_pci_pseudo)
|
||||
Opcode = Hexagon::S2_storerd_pci;
|
||||
else if (Opc == Hexagon::S2_storeri_pci_pseudo)
|
||||
Opcode = Hexagon::S2_storeri_pci;
|
||||
else if (Opc == Hexagon::S2_storerh_pci_pseudo)
|
||||
Opcode = Hexagon::S2_storerh_pci;
|
||||
else if (Opc == Hexagon::S2_storerf_pci_pseudo)
|
||||
Opcode = Hexagon::S2_storerf_pci;
|
||||
else if (Opc == Hexagon::S2_storerb_pci_pseudo)
|
||||
Opcode = Hexagon::S2_storerb_pci;
|
||||
else
|
||||
llvm_unreachable("wrong Opc");
|
||||
MachineOperand &Op0 = MI->getOperand(0);
|
||||
MachineOperand &Op1 = MI->getOperand(1);
|
||||
MachineOperand &Op2 = MI->getOperand(2);
|
||||
MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
|
||||
MachineOperand &Op4 = MI->getOperand(4);
|
||||
// Emit a "C6 = Rn, C6 is the control register for M0".
|
||||
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
|
||||
Hexagon::C6)->addOperand(Op3);
|
||||
// Replace the pseude circ_ldd by the real circ_ldd.
|
||||
MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
|
||||
TII->get(Opcode));
|
||||
NewMI->addOperand(Op0);
|
||||
NewMI->addOperand(Op1);
|
||||
NewMI->addOperand(Op4);
|
||||
NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
|
||||
false, /*isDef*/
|
||||
false, /*isImpl*/
|
||||
true /*isKill*/));
|
||||
NewMI->addOperand(Op2);
|
||||
MII = MBB->erase(MI);
|
||||
--MII;
|
||||
} else if (Opc == Hexagon::L2_loadrd_pci_pseudo ||
|
||||
Opc == Hexagon::L2_loadri_pci_pseudo ||
|
||||
Opc == Hexagon::L2_loadrh_pci_pseudo ||
|
||||
Opc == Hexagon::L2_loadruh_pci_pseudo||
|
||||
Opc == Hexagon::L2_loadrb_pci_pseudo ||
|
||||
Opc == Hexagon::L2_loadrub_pci_pseudo) {
|
||||
unsigned Opcode;
|
||||
if (Opc == Hexagon::L2_loadrd_pci_pseudo)
|
||||
Opcode = Hexagon::L2_loadrd_pci;
|
||||
else if (Opc == Hexagon::L2_loadri_pci_pseudo)
|
||||
Opcode = Hexagon::L2_loadri_pci;
|
||||
else if (Opc == Hexagon::L2_loadrh_pci_pseudo)
|
||||
Opcode = Hexagon::L2_loadrh_pci;
|
||||
else if (Opc == Hexagon::L2_loadruh_pci_pseudo)
|
||||
Opcode = Hexagon::L2_loadruh_pci;
|
||||
else if (Opc == Hexagon::L2_loadrb_pci_pseudo)
|
||||
Opcode = Hexagon::L2_loadrb_pci;
|
||||
else if (Opc == Hexagon::L2_loadrub_pci_pseudo)
|
||||
Opcode = Hexagon::L2_loadrub_pci;
|
||||
else
|
||||
llvm_unreachable("wrong Opc");
|
||||
|
||||
MachineOperand &Op0 = MI->getOperand(0);
|
||||
MachineOperand &Op1 = MI->getOperand(1);
|
||||
MachineOperand &Op2 = MI->getOperand(2);
|
||||
MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
|
||||
MachineOperand &Op5 = MI->getOperand(5);
|
||||
// Emit a "C6 = Rn, C6 is the control register for M0".
|
||||
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
|
||||
Hexagon::C6)->addOperand(Op4);
|
||||
// Replace the pseude circ_ldd by the real circ_ldd.
|
||||
MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
|
||||
TII->get(Opcode));
|
||||
NewMI->addOperand(Op1);
|
||||
NewMI->addOperand(Op0);
|
||||
NewMI->addOperand(Op2);
|
||||
NewMI->addOperand(Op5);
|
||||
NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
|
||||
false, /*isDef*/
|
||||
false, /*isImpl*/
|
||||
true /*isKill*/));
|
||||
MII = MBB->erase(MI);
|
||||
--MII;
|
||||
} else if (Opc == Hexagon::L2_loadrd_pbr_pseudo ||
|
||||
Opc == Hexagon::L2_loadri_pbr_pseudo ||
|
||||
Opc == Hexagon::L2_loadrh_pbr_pseudo ||
|
||||
Opc == Hexagon::L2_loadruh_pbr_pseudo||
|
||||
Opc == Hexagon::L2_loadrb_pbr_pseudo ||
|
||||
Opc == Hexagon::L2_loadrub_pbr_pseudo) {
|
||||
unsigned Opcode;
|
||||
if (Opc == Hexagon::L2_loadrd_pbr_pseudo)
|
||||
Opcode = Hexagon::L2_loadrd_pbr;
|
||||
else if (Opc == Hexagon::L2_loadri_pbr_pseudo)
|
||||
Opcode = Hexagon::L2_loadri_pbr;
|
||||
else if (Opc == Hexagon::L2_loadrh_pbr_pseudo)
|
||||
Opcode = Hexagon::L2_loadrh_pbr;
|
||||
else if (Opc == Hexagon::L2_loadruh_pbr_pseudo)
|
||||
Opcode = Hexagon::L2_loadruh_pbr;
|
||||
else if (Opc == Hexagon::L2_loadrb_pbr_pseudo)
|
||||
Opcode = Hexagon::L2_loadrb_pbr;
|
||||
else if (Opc == Hexagon::L2_loadrub_pbr_pseudo)
|
||||
Opcode = Hexagon::L2_loadrub_pbr;
|
||||
else
|
||||
llvm_unreachable("wrong Opc");
|
||||
MachineOperand &Op0 = MI->getOperand(0);
|
||||
MachineOperand &Op1 = MI->getOperand(1);
|
||||
MachineOperand &Op2 = MI->getOperand(2);
|
||||
MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
|
||||
// Emit a "C6 = Rn, C6 is the control register for M0".
|
||||
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
|
||||
Hexagon::C6)->addOperand(Op4);
|
||||
// Replace the pseudo brev_ldd by the real brev_ldd.
|
||||
MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
|
||||
TII->get(Opcode));
|
||||
NewMI->addOperand(Op1);
|
||||
NewMI->addOperand(Op0);
|
||||
NewMI->addOperand(Op2);
|
||||
NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
|
||||
false, /*isDef*/
|
||||
false, /*isImpl*/
|
||||
true /*isKill*/));
|
||||
MII = MBB->erase(MI);
|
||||
--MII;
|
||||
} else if (Opc == Hexagon::S2_storerd_pbr_pseudo ||
|
||||
Opc == Hexagon::S2_storeri_pbr_pseudo ||
|
||||
Opc == Hexagon::S2_storerh_pbr_pseudo ||
|
||||
Opc == Hexagon::S2_storerb_pbr_pseudo ||
|
||||
Opc == Hexagon::S2_storerf_pbr_pseudo) {
|
||||
unsigned Opcode;
|
||||
if (Opc == Hexagon::S2_storerd_pbr_pseudo)
|
||||
Opcode = Hexagon::S2_storerd_pbr;
|
||||
else if (Opc == Hexagon::S2_storeri_pbr_pseudo)
|
||||
Opcode = Hexagon::S2_storeri_pbr;
|
||||
else if (Opc == Hexagon::S2_storerh_pbr_pseudo)
|
||||
Opcode = Hexagon::S2_storerh_pbr;
|
||||
else if (Opc == Hexagon::S2_storerf_pbr_pseudo)
|
||||
Opcode = Hexagon::S2_storerf_pbr;
|
||||
else if (Opc == Hexagon::S2_storerb_pbr_pseudo)
|
||||
Opcode = Hexagon::S2_storerb_pbr;
|
||||
else
|
||||
llvm_unreachable("wrong Opc");
|
||||
MachineOperand &Op0 = MI->getOperand(0);
|
||||
MachineOperand &Op1 = MI->getOperand(1);
|
||||
MachineOperand &Op2 = MI->getOperand(2);
|
||||
MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
|
||||
// Emit a "C6 = Rn, C6 is the control register for M0".
|
||||
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
|
||||
Hexagon::C6)->addOperand(Op3);
|
||||
// Replace the pseudo brev_ldd by the real brev_ldd.
|
||||
MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
|
||||
TII->get(Opcode));
|
||||
NewMI->addOperand(Op0);
|
||||
NewMI->addOperand(Op1);
|
||||
NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
|
||||
false, /*isDef*/
|
||||
false, /*isImpl*/
|
||||
true /*isKill*/));
|
||||
NewMI->addOperand(Op2);
|
||||
MII = MBB->erase(MI);
|
||||
--MII;
|
||||
} else if (Opc == Hexagon::STriw_pred) {
|
||||
// STriw_pred [R30], ofst, SrcReg;
|
||||
unsigned FP = MI->getOperand(0).getReg();
|
||||
assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
|
||||
|
@ -747,6 +747,203 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
||||
//
|
||||
// Checking for intrinsics circular load/store, and bitreverse load/store
|
||||
// instrisics in order to select the correct lowered operation.
|
||||
//
|
||||
SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
|
||||
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
|
||||
if (IntNo == Intrinsic::hexagon_circ_ldd ||
|
||||
IntNo == Intrinsic::hexagon_circ_ldw ||
|
||||
IntNo == Intrinsic::hexagon_circ_lduh ||
|
||||
IntNo == Intrinsic::hexagon_circ_ldh ||
|
||||
IntNo == Intrinsic::hexagon_circ_ldub ||
|
||||
IntNo == Intrinsic::hexagon_circ_ldb) {
|
||||
SDLoc dl(N);
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Base = N->getOperand(2);
|
||||
SDValue Load = N->getOperand(3);
|
||||
SDValue ModifierExpr = N->getOperand(4);
|
||||
SDValue Offset = N->getOperand(5);
|
||||
|
||||
// We need to add the rerurn type for the load. This intrinsic has
|
||||
// two return types, one for the load and one for the post-increment.
|
||||
// Only the *_ld instructions push the extra return type, and bump the
|
||||
// result node operand number correspondingly.
|
||||
std::vector<EVT> ResTys;
|
||||
unsigned opc;
|
||||
unsigned memsize, align;
|
||||
MVT MvtSize = MVT::i32;
|
||||
|
||||
if (IntNo == Intrinsic::hexagon_circ_ldd) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i64);
|
||||
opc = Hexagon::L2_loadrd_pci_pseudo;
|
||||
memsize = 8;
|
||||
align = 8;
|
||||
} else if (IntNo == Intrinsic::hexagon_circ_ldw) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadri_pci_pseudo;
|
||||
memsize = 4;
|
||||
align = 4;
|
||||
} else if (IntNo == Intrinsic::hexagon_circ_ldh) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadrh_pci_pseudo;
|
||||
memsize = 2;
|
||||
align = 2;
|
||||
MvtSize = MVT::i16;
|
||||
} else if (IntNo == Intrinsic::hexagon_circ_lduh) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadruh_pci_pseudo;
|
||||
memsize = 2;
|
||||
align = 2;
|
||||
MvtSize = MVT::i16;
|
||||
} else if (IntNo == Intrinsic::hexagon_circ_ldb) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadrb_pci_pseudo;
|
||||
memsize = 1;
|
||||
align = 1;
|
||||
MvtSize = MVT::i8;
|
||||
} else if (IntNo == Intrinsic::hexagon_circ_ldub) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadrub_pci_pseudo;
|
||||
memsize = 1;
|
||||
align = 1;
|
||||
MvtSize = MVT::i8;
|
||||
} else
|
||||
llvm_unreachable("no opc");
|
||||
|
||||
ResTys.push_back(MVT::Other);
|
||||
|
||||
// Copy over the arguments, which are the same mostly.
|
||||
SmallVector<SDValue, 5> Ops;
|
||||
Ops.push_back(Base);
|
||||
Ops.push_back(Load);
|
||||
Ops.push_back(ModifierExpr);
|
||||
int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
|
||||
Ops.push_back(CurDAG->getTargetConstant(Val, MVT::i32));
|
||||
Ops.push_back(Chain);
|
||||
SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
|
||||
|
||||
SDValue ST;
|
||||
MachineMemOperand *Mem =
|
||||
MF->getMachineMemOperand(MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore, memsize, align);
|
||||
if (MvtSize != MVT::i32)
|
||||
ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
|
||||
MvtSize, Mem);
|
||||
else
|
||||
ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
|
||||
|
||||
SDNode* Store = SelectStore(ST.getNode());
|
||||
|
||||
const SDValue Froms[] = { SDValue(N, 0),
|
||||
SDValue(N, 1) };
|
||||
const SDValue Tos[] = { SDValue(Result, 0),
|
||||
SDValue(Store, 0) };
|
||||
ReplaceUses(Froms, Tos, 2);
|
||||
return Result;
|
||||
}
|
||||
|
||||
if (IntNo == Intrinsic::hexagon_brev_ldd ||
|
||||
IntNo == Intrinsic::hexagon_brev_ldw ||
|
||||
IntNo == Intrinsic::hexagon_brev_ldh ||
|
||||
IntNo == Intrinsic::hexagon_brev_lduh ||
|
||||
IntNo == Intrinsic::hexagon_brev_ldb ||
|
||||
IntNo == Intrinsic::hexagon_brev_ldub) {
|
||||
SDLoc dl(N);
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Base = N->getOperand(2);
|
||||
SDValue Load = N->getOperand(3);
|
||||
SDValue ModifierExpr = N->getOperand(4);
|
||||
|
||||
// We need to add the rerurn type for the load. This intrinsic has
|
||||
// two return types, one for the load and one for the post-increment.
|
||||
std::vector<EVT> ResTys;
|
||||
unsigned opc;
|
||||
unsigned memsize, align;
|
||||
MVT MvtSize = MVT::i32;
|
||||
|
||||
if (IntNo == Intrinsic::hexagon_brev_ldd) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i64);
|
||||
opc = Hexagon::L2_loadrd_pbr_pseudo;
|
||||
memsize = 8;
|
||||
align = 8;
|
||||
} else if (IntNo == Intrinsic::hexagon_brev_ldw) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadri_pbr_pseudo;
|
||||
memsize = 4;
|
||||
align = 4;
|
||||
} else if (IntNo == Intrinsic::hexagon_brev_ldh) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadrh_pbr_pseudo;
|
||||
memsize = 2;
|
||||
align = 2;
|
||||
MvtSize = MVT::i16;
|
||||
} else if (IntNo == Intrinsic::hexagon_brev_lduh) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadruh_pbr_pseudo;
|
||||
memsize = 2;
|
||||
align = 2;
|
||||
MvtSize = MVT::i16;
|
||||
} else if (IntNo == Intrinsic::hexagon_brev_ldb) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadrb_pbr_pseudo;
|
||||
memsize = 1;
|
||||
align = 1;
|
||||
MvtSize = MVT::i8;
|
||||
} else if (IntNo == Intrinsic::hexagon_brev_ldub) {
|
||||
ResTys.push_back(MVT::i32);
|
||||
ResTys.push_back(MVT::i32);
|
||||
opc = Hexagon::L2_loadrub_pbr_pseudo;
|
||||
memsize = 1;
|
||||
align = 1;
|
||||
MvtSize = MVT::i8;
|
||||
} else
|
||||
llvm_unreachable("no opc");
|
||||
|
||||
ResTys.push_back(MVT::Other);
|
||||
|
||||
// Copy over the arguments, which are the same mostly.
|
||||
SmallVector<SDValue, 4> Ops;
|
||||
Ops.push_back(Base);
|
||||
Ops.push_back(Load);
|
||||
Ops.push_back(ModifierExpr);
|
||||
Ops.push_back(Chain);
|
||||
SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
|
||||
SDValue ST;
|
||||
MachineMemOperand *Mem =
|
||||
MF->getMachineMemOperand(MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore, memsize, align);
|
||||
if (MvtSize != MVT::i32)
|
||||
ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
|
||||
MvtSize, Mem);
|
||||
else
|
||||
ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
|
||||
|
||||
SDNode* Store = SelectStore(ST.getNode());
|
||||
|
||||
const SDValue Froms[] = { SDValue(N, 0),
|
||||
SDValue(N, 1) };
|
||||
const SDValue Tos[] = { SDValue(Result, 0),
|
||||
SDValue(Store, 0) };
|
||||
ReplaceUses(Froms, Tos, 2);
|
||||
return Result;
|
||||
}
|
||||
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
||||
//
|
||||
// Checking for intrinsics which have predicate registers as operand(s)
|
||||
// and lowering to the actual intrinsic.
|
||||
@ -1055,6 +1252,9 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
|
||||
case ISD::ZERO_EXTEND:
|
||||
return SelectZeroExtend(N);
|
||||
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
return SelectIntrinsicWChain(N);
|
||||
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
return SelectIntrinsicWOChain(N);
|
||||
}
|
||||
|
@ -1550,7 +1550,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default: llvm_unreachable("Unknown .new type");
|
||||
// store new value byte
|
||||
case Hexagon::S4_storerb_ur:
|
||||
return Hexagon::S4_storerbnew_ur;
|
||||
|
||||
@ -1560,6 +1559,20 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
|
||||
case Hexagon::S4_storeri_ur:
|
||||
return Hexagon::S4_storerinew_ur;
|
||||
|
||||
case Hexagon::S2_storerb_pci:
|
||||
return Hexagon::S2_storerb_pci;
|
||||
|
||||
case Hexagon::S2_storeri_pci:
|
||||
return Hexagon::S2_storeri_pci;
|
||||
|
||||
case Hexagon::S2_storerh_pci:
|
||||
return Hexagon::S2_storerh_pci;
|
||||
|
||||
case Hexagon::S2_storerd_pci:
|
||||
return Hexagon::S2_storerd_pci;
|
||||
|
||||
case Hexagon::S2_storerf_pci:
|
||||
return Hexagon::S2_storerf_pci;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1257,6 +1257,30 @@ def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))),
|
||||
def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))),
|
||||
(i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>;
|
||||
|
||||
/********************************************************************
|
||||
* ST
|
||||
*********************************************************************/
|
||||
|
||||
class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
|
||||
: Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
|
||||
(MI I32:$Rs, Val:$Rt, I32:$Ru)>;
|
||||
|
||||
def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>;
|
||||
def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>;
|
||||
def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>;
|
||||
def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>;
|
||||
def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>;
|
||||
|
||||
class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
|
||||
: Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
|
||||
(MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>;
|
||||
|
||||
def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>;
|
||||
def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>;
|
||||
def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>;
|
||||
def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>;
|
||||
def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
|
||||
|
||||
include "HexagonIntrinsicsV3.td"
|
||||
include "HexagonIntrinsicsV4.td"
|
||||
include "HexagonIntrinsicsV5.td"
|
||||
|
140
test/CodeGen/Hexagon/brev_ld.ll
Normal file
140
test/CodeGen/Hexagon/brev_ld.ll
Normal file
@ -0,0 +1,140 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s
|
||||
; Testing bitreverse load intrinsics:
|
||||
; Q6_bitrev_load_update_D(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_load_update_W(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_load_update_H(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_load_update_UH(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_load_update_UB(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_load_update_B(inputLR, pDelay, nConvLength);
|
||||
; producing these instructions:
|
||||
; r3:2 = memd(r0++m0:brev)
|
||||
; r1 = memw(r0++m0:brev)
|
||||
; r1 = memh(r0++m0:brev)
|
||||
; r1 = memuh(r0++m0:brev)
|
||||
; r1 = memub(r0++m0:brev)
|
||||
; r1 = memb(r0++m0:brev)
|
||||
|
||||
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
|
||||
target triple = "hexagon"
|
||||
|
||||
define i64 @foo(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i64, align 8
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%1 = bitcast i64* %inputLR to i8*
|
||||
%sub = sub i32 13, %shr1
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memd(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
|
||||
%2 = call i8* @llvm.hexagon.brev.ldd(i8* %0, i8* %1, i32 %shl)
|
||||
%3 = bitcast i8* %2 to i64*
|
||||
%4 = load i64, i64* %3, align 8, !tbaa !0
|
||||
ret i64 %4
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.ldd(i8*, i8*, i32) nounwind
|
||||
|
||||
define i32 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i32, align 4
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%1 = bitcast i32* %inputLR to i8*
|
||||
%sub = sub i32 14, %shr1
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memw(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
|
||||
%2 = call i8* @llvm.hexagon.brev.ldw(i8* %0, i8* %1, i32 %shl)
|
||||
%3 = bitcast i8* %2 to i32*
|
||||
%4 = load i32, i32* %3, align 4, !tbaa !2
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.ldw(i8*, i8*, i32) nounwind
|
||||
|
||||
define signext i16 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i16, align 2
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%1 = bitcast i16* %inputLR to i8*
|
||||
%sub = sub i32 15, %shr1
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memh(r{{[0-9]*}} ++ m0:brev)
|
||||
%2 = call i8* @llvm.hexagon.brev.ldh(i8* %0, i8* %1, i32 %shl)
|
||||
%3 = bitcast i8* %2 to i16*
|
||||
%4 = load i16, i16* %3, align 2, !tbaa !3
|
||||
ret i16 %4
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.ldh(i8*, i8*, i32) nounwind
|
||||
|
||||
define zeroext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i16, align 2
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%1 = bitcast i16* %inputLR to i8*
|
||||
%sub = sub i32 15, %shr1
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memuh(r{{[0-9]*}} ++ m0:brev)
|
||||
%2 = call i8* @llvm.hexagon.brev.lduh(i8* %0, i8* %1, i32 %shl)
|
||||
%3 = bitcast i8* %2 to i16*
|
||||
%4 = load i16, i16* %3, align 2, !tbaa !3
|
||||
ret i16 %4
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.lduh(i8*, i8*, i32) nounwind
|
||||
|
||||
define zeroext i8 @foo4(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i8, align 1
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%sub = sub nsw i32 16, %shr1
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memub(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
|
||||
%1 = call i8* @llvm.hexagon.brev.ldub(i8* %0, i8* %inputLR, i32 %shl)
|
||||
%2 = load i8, i8* %1, align 1, !tbaa !0
|
||||
ret i8 %2
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.ldub(i8*, i8*, i32) nounwind
|
||||
|
||||
define zeroext i8 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i8, align 1
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%sub = sub nsw i32 16, %shr1
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memb(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
|
||||
%1 = call i8* @llvm.hexagon.brev.ldb(i8* %0, i8* %inputLR, i32 %shl)
|
||||
%2 = load i8, i8* %1, align 1, !tbaa !0
|
||||
ret i8 %2
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.ldb(i8*, i8*, i32) nounwind
|
||||
|
||||
!0 = !{!"omnipotent char", !1}
|
||||
!1 = !{!"Simple C/C++ TBAA"}
|
||||
!2 = !{!"int", !0}
|
||||
!3 = !{!"short", !0}
|
112
test/CodeGen/Hexagon/brev_st.ll
Normal file
112
test/CodeGen/Hexagon/brev_st.ll
Normal file
@ -0,0 +1,112 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s
|
||||
; Test these 5 bitreverse store intrinsics:
|
||||
; Q6_bitrev_store_update_D(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_store_update_W(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_store_update_HL(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_store_update_HH(inputLR, pDelay, nConvLength);
|
||||
; Q6_bitrev_store_update_B(inputLR, pDelay, nConvLength);
|
||||
; producing these instructions:
|
||||
; memd(r0++m0:brev) = r1:0
|
||||
; memw(r0++m0:brev) = r0
|
||||
; memh(r0++m0:brev) = r3
|
||||
; memh(r0++m0:brev) = r3.h
|
||||
; memb(r0++m0:brev) = r3
|
||||
|
||||
; ModuleID = 'brev_st.i'
|
||||
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
|
||||
target triple = "hexagon"
|
||||
|
||||
define i64 @foo(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr2 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%sub = sub i32 13, %shr2
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memd(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
|
||||
%1 = tail call i8* @llvm.hexagon.brev.std(i8* %0, i64 undef, i32 %shl)
|
||||
%2 = bitcast i8* %1 to i64*
|
||||
%3 = load i64, i64* %2, align 8, !tbaa !0
|
||||
ret i64 %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.std(i8*, i64, i32) nounwind
|
||||
|
||||
define i32 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%sub = sub i32 14, %shr1
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memw(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
|
||||
%1 = tail call i8* @llvm.hexagon.brev.stw(i8* %0, i32 undef, i32 %shl)
|
||||
%2 = bitcast i8* %1 to i32*
|
||||
%3 = load i32, i32* %2, align 4, !tbaa !2
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.stw(i8*, i32, i32) nounwind
|
||||
|
||||
define signext i16 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr2 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%sub = sub i32 15, %shr2
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memh(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
|
||||
%1 = tail call i8* @llvm.hexagon.brev.sth(i8* %0, i32 0, i32 %shl)
|
||||
%2 = bitcast i8* %1 to i16*
|
||||
%3 = load i16, i16* %2, align 2, !tbaa !3
|
||||
ret i16 %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.sth(i8*, i32, i32) nounwind
|
||||
|
||||
define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr2 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%sub = sub i32 15, %shr2
|
||||
%shl = shl i32 1, %sub
|
||||
; CHECK: memh(r{{[0-9]*}} ++ m{{[0-1]}}:brev){{ *}}={{ *}}r{{[0-9]*}}.h
|
||||
%1 = tail call i8* @llvm.hexagon.brev.sthhi(i8* %0, i32 0, i32 %shl)
|
||||
%2 = bitcast i8* %1 to i16*
|
||||
%3 = load i16, i16* %2, align 2, !tbaa !3
|
||||
ret i16 %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.sthhi(i8*, i32, i32) nounwind
|
||||
|
||||
define zeroext i8 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr2 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%sub = sub nsw i32 16, %shr2
|
||||
; CHECK: memb(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
|
||||
%shl = shl i32 1, %sub
|
||||
%1 = tail call i8* @llvm.hexagon.brev.stb(i8* %0, i32 0, i32 %shl)
|
||||
%2 = load i8, i8* %1, align 1, !tbaa !0
|
||||
ret i8 %2
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.brev.stb(i8*, i32, i32) nounwind
|
||||
|
||||
!0 = !{!"omnipotent char", !1}
|
||||
!1 = !{!"Simple C/C++ TBAA"}
|
||||
!2 = !{!"int", !0}
|
||||
!3 = !{!"short", !0}
|
135
test/CodeGen/Hexagon/circ_ld.ll
Normal file
135
test/CodeGen/Hexagon/circ_ld.ll
Normal file
@ -0,0 +1,135 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
; Testing for these 6 variants of circular load:
|
||||
; Q6_circ_load_update_B(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_load_update_D(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_load_update_H(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_load_update_UB(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_load_update_UH(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_load_update_W(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; producing these:
|
||||
; r0 = memb(r1++#-1:circ(m0))
|
||||
; r3:2 = memd(r1++#-8:circ(m0))
|
||||
; r0 = memh(r1++#-2:circ(m0))
|
||||
; r0 = memub(r1++#-1:circ(m0))
|
||||
; r0 = memuh(r1++#-2:circ(m0))
|
||||
; r0 = memw(r1++#-4:circ(m0))
|
||||
|
||||
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
|
||||
target triple = "hexagon"
|
||||
|
||||
define zeroext i8 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i8, align 1
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%or = or i32 %shr1, 33554432
|
||||
; CHECK: memb(r{{[0-9]*.}}++{{.}}#-1:circ(m{{[0-1]}}))
|
||||
%1 = call i8* @llvm.hexagon.circ.ldb(i8* %0, i8* %inputLR, i32 %or, i32 -1)
|
||||
%2 = load i8, i8* %1, align 1, !tbaa !0
|
||||
ret i8 %2
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.ldb(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
define i64 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i64, align 8
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%1 = bitcast i64* %inputLR to i8*
|
||||
%shl = shl nuw nsw i32 %shr1, 3
|
||||
%or = or i32 %shl, 83886080
|
||||
; CHECK: memd(r{{[0-9]*.}}++{{.}}#-8:circ(m{{[0-1]}}))
|
||||
%2 = call i8* @llvm.hexagon.circ.ldd(i8* %0, i8* %1, i32 %or, i32 -8)
|
||||
%3 = bitcast i8* %2 to i64*
|
||||
%4 = load i64, i64* %3, align 8, !tbaa !0
|
||||
ret i64 %4
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.ldd(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i16, align 2
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = and i32 %conv, 65534
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%1 = bitcast i16* %inputLR to i8*
|
||||
%or = or i32 %shr1, 50331648
|
||||
; CHECK: memh(r{{[0-9]*.}}++{{.}}#-2:circ(m{{[0-1]}}))
|
||||
%2 = call i8* @llvm.hexagon.circ.ldh(i8* %0, i8* %1, i32 %or, i32 -2)
|
||||
%3 = bitcast i8* %2 to i16*
|
||||
%4 = load i16, i16* %3, align 2, !tbaa !2
|
||||
ret i16 %4
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.ldh(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
define zeroext i8 @foo4(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i8, align 1
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%or = or i32 %shr1, 33554432
|
||||
; CHECK: memub(r{{[0-9]*.}}++{{.}}#-1:circ(m{{[0-1]}}))
|
||||
%1 = call i8* @llvm.hexagon.circ.ldub(i8* %0, i8* %inputLR, i32 %or, i32 -1)
|
||||
%2 = load i8, i8* %1, align 1, !tbaa !0
|
||||
ret i8 %2
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.ldub(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
define zeroext i16 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i16, align 2
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = and i32 %conv, 65534
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%1 = bitcast i16* %inputLR to i8*
|
||||
%or = or i32 %shr1, 50331648
|
||||
; CHECK: memuh(r{{[0-9]*.}}++{{.}}#-2:circ(m{{[0-1]}}))
|
||||
%2 = call i8* @llvm.hexagon.circ.lduh(i8* %0, i8* %1, i32 %or, i32 -2)
|
||||
%3 = bitcast i8* %2 to i16*
|
||||
%4 = load i16, i16* %3, align 2, !tbaa !2
|
||||
ret i16 %4
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.lduh(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
define i32 @foo6(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%inputLR = alloca i32, align 4
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%1 = bitcast i32* %inputLR to i8*
|
||||
%shl = shl nuw nsw i32 %shr1, 2
|
||||
%or = or i32 %shl, 67108864
|
||||
; CHECK: memw(r{{[0-9]*.}}++{{.}}#-4:circ(m{{[0-1]}}))
|
||||
%2 = call i8* @llvm.hexagon.circ.ldw(i8* %0, i8* %1, i32 %or, i32 -4)
|
||||
%3 = bitcast i8* %2 to i32*
|
||||
%4 = load i32, i32* %3, align 4, !tbaa !3
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.ldw(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
!0 = !{!"omnipotent char", !1}
|
||||
!1 = !{!"Simple C/C++ TBAA"}
|
||||
!2 = !{!"short", !0}
|
||||
!3 = !{!"int", !0}
|
255
test/CodeGen/Hexagon/circ_ldd_bug.ll
Normal file
255
test/CodeGen/Hexagon/circ_ldd_bug.ll
Normal file
@ -0,0 +1,255 @@
|
||||
; RUN: llc -O2 < %s
|
||||
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
|
||||
target triple = "hexagon"
|
||||
|
||||
; We would fail on this file with:
|
||||
; Unimplemented
|
||||
; UNREACHABLE executed at llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp:615!
|
||||
; This happened because after unrolling a loop with a ldd_circ instruction we
|
||||
; would have several TFCR and ldd_circ instruction sequences.
|
||||
; %vreg0 (CRRegs) = TFCR %vreg0 (IntRegs)
|
||||
; = ldd_circ( , , vreg0)
|
||||
; %vreg1 (CRRegs) = TFCR %vreg1 (IntRegs)
|
||||
; = ldd_circ( , , vreg0)
|
||||
; The scheduler would move the CRRegs to the top of the loop. The allocator
|
||||
; would try to spill the CRRegs after running out of them. We don't have code to
|
||||
; spill CRRegs and the above assertion would be triggered.
|
||||
declare i8* @llvm.hexagon.circ.ldd(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
define i32 @test(i16 zeroext %var0, i16* %var1, i16 signext %var2, i16* nocapture %var3) nounwind {
|
||||
entry:
|
||||
%var4 = alloca i64, align 8
|
||||
%conv = zext i16 %var0 to i32
|
||||
%shr5 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %var2 to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %var1, i32 %idxprom
|
||||
%0 = bitcast i16* %var3 to i64*
|
||||
%1 = load i64, i64* %0, align 8, !tbaa !1
|
||||
%2 = bitcast i16* %arrayidx to i8*
|
||||
%3 = bitcast i64* %var4 to i8*
|
||||
%shl = shl nuw nsw i32 %shr5, 3
|
||||
%or = or i32 %shl, 83886080
|
||||
%4 = call i8* @llvm.hexagon.circ.ldd(i8* %2, i8* %3, i32 %or, i32 -8)
|
||||
%sub = add nsw i32 %shr5, -1
|
||||
%cmp6 = icmp sgt i32 %sub, 0
|
||||
%5 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%6 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 0, i64 %1, i64 %5)
|
||||
br i1 %cmp6, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%incdec.ptr = getelementptr inbounds i16, i16* %var3, i32 4
|
||||
%7 = bitcast i16* %incdec.ptr to i64*
|
||||
%8 = zext i16 %var0 to i32
|
||||
%9 = lshr i32 %8, 1
|
||||
%10 = add i32 %9, -1
|
||||
%xtraiter = urem i32 %10, 8
|
||||
%lcmp = icmp ne i32 %xtraiter, 0
|
||||
br i1 %lcmp, label %unr.cmp60, label %for.body.lr.ph.split.split
|
||||
|
||||
unr.cmp60: ; preds = %for.body.lr.ph
|
||||
%un.tmp61 = icmp eq i32 %xtraiter, 1
|
||||
br i1 %un.tmp61, label %for.body.unr53, label %unr.cmp51
|
||||
|
||||
unr.cmp51: ; preds = %unr.cmp60
|
||||
%un.tmp52 = icmp eq i32 %xtraiter, 2
|
||||
br i1 %un.tmp52, label %for.body.unr44, label %unr.cmp42
|
||||
|
||||
unr.cmp42: ; preds = %unr.cmp51
|
||||
%un.tmp43 = icmp eq i32 %xtraiter, 3
|
||||
br i1 %un.tmp43, label %for.body.unr35, label %unr.cmp33
|
||||
|
||||
unr.cmp33: ; preds = %unr.cmp42
|
||||
%un.tmp34 = icmp eq i32 %xtraiter, 4
|
||||
br i1 %un.tmp34, label %for.body.unr26, label %unr.cmp24
|
||||
|
||||
unr.cmp24: ; preds = %unr.cmp33
|
||||
%un.tmp25 = icmp eq i32 %xtraiter, 5
|
||||
br i1 %un.tmp25, label %for.body.unr17, label %unr.cmp
|
||||
|
||||
unr.cmp: ; preds = %unr.cmp24
|
||||
%un.tmp = icmp eq i32 %xtraiter, 6
|
||||
br i1 %un.tmp, label %for.body.unr13, label %for.body.unr
|
||||
|
||||
for.body.unr: ; preds = %unr.cmp
|
||||
%11 = call i8* @llvm.hexagon.circ.ldd(i8* %4, i8* %3, i32 %or, i32 -8)
|
||||
%12 = load i64, i64* %7, align 8, !tbaa !1
|
||||
%inc.unr = add nsw i32 0, 1
|
||||
%incdec.ptr4.unr = getelementptr inbounds i64, i64* %7, i32 1
|
||||
%cmp.unr = icmp slt i32 %inc.unr, %sub
|
||||
%13 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%14 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %6, i64 %12, i64 %13)
|
||||
br label %for.body.unr13
|
||||
|
||||
for.body.unr13: ; preds = %for.body.unr, %unr.cmp
|
||||
%15 = phi i64 [ %6, %unr.cmp ], [ %14, %for.body.unr ]
|
||||
%pvar6.09.unr = phi i64* [ %7, %unr.cmp ], [ %incdec.ptr4.unr, %for.body.unr ]
|
||||
%var8.0.in8.unr = phi i8* [ %4, %unr.cmp ], [ %11, %for.body.unr ]
|
||||
%i.07.unr = phi i32 [ 0, %unr.cmp ], [ %inc.unr, %for.body.unr ]
|
||||
%16 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr, i8* %3, i32 %or, i32 -8)
|
||||
%17 = load i64, i64* %pvar6.09.unr, align 8, !tbaa !1
|
||||
%inc.unr14 = add nsw i32 %i.07.unr, 1
|
||||
%incdec.ptr4.unr15 = getelementptr inbounds i64, i64* %pvar6.09.unr, i32 1
|
||||
%cmp.unr16 = icmp slt i32 %inc.unr14, %sub
|
||||
%18 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%19 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %15, i64 %17, i64 %18)
|
||||
br label %for.body.unr17
|
||||
|
||||
for.body.unr17: ; preds = %for.body.unr13, %unr.cmp24
|
||||
%20 = phi i64 [ %6, %unr.cmp24 ], [ %19, %for.body.unr13 ]
|
||||
%pvar6.09.unr18 = phi i64* [ %7, %unr.cmp24 ], [ %incdec.ptr4.unr15, %for.body.unr13 ]
|
||||
%var8.0.in8.unr19 = phi i8* [ %4, %unr.cmp24 ], [ %16, %for.body.unr13 ]
|
||||
%i.07.unr20 = phi i32 [ 0, %unr.cmp24 ], [ %inc.unr14, %for.body.unr13 ]
|
||||
%21 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr19, i8* %3, i32 %or, i32 -8)
|
||||
%22 = load i64, i64* %pvar6.09.unr18, align 8, !tbaa !1
|
||||
%inc.unr21 = add nsw i32 %i.07.unr20, 1
|
||||
%incdec.ptr4.unr22 = getelementptr inbounds i64, i64* %pvar6.09.unr18, i32 1
|
||||
%cmp.unr23 = icmp slt i32 %inc.unr21, %sub
|
||||
%23 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%24 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %20, i64 %22, i64 %23)
|
||||
br label %for.body.unr26
|
||||
|
||||
for.body.unr26: ; preds = %for.body.unr17, %unr.cmp33
|
||||
%25 = phi i64 [ %6, %unr.cmp33 ], [ %24, %for.body.unr17 ]
|
||||
%pvar6.09.unr27 = phi i64* [ %7, %unr.cmp33 ], [ %incdec.ptr4.unr22, %for.body.unr17 ]
|
||||
%var8.0.in8.unr28 = phi i8* [ %4, %unr.cmp33 ], [ %21, %for.body.unr17 ]
|
||||
%i.07.unr29 = phi i32 [ 0, %unr.cmp33 ], [ %inc.unr21, %for.body.unr17 ]
|
||||
%26 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr28, i8* %3, i32 %or, i32 -8)
|
||||
%27 = load i64, i64* %pvar6.09.unr27, align 8, !tbaa !1
|
||||
%inc.unr30 = add nsw i32 %i.07.unr29, 1
|
||||
%incdec.ptr4.unr31 = getelementptr inbounds i64, i64* %pvar6.09.unr27, i32 1
|
||||
%cmp.unr32 = icmp slt i32 %inc.unr30, %sub
|
||||
%28 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%29 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %25, i64 %27, i64 %28)
|
||||
br label %for.body.unr35
|
||||
|
||||
for.body.unr35: ; preds = %for.body.unr26, %unr.cmp42
|
||||
%30 = phi i64 [ %6, %unr.cmp42 ], [ %29, %for.body.unr26 ]
|
||||
%pvar6.09.unr36 = phi i64* [ %7, %unr.cmp42 ], [ %incdec.ptr4.unr31, %for.body.unr26 ]
|
||||
%var8.0.in8.unr37 = phi i8* [ %4, %unr.cmp42 ], [ %26, %for.body.unr26 ]
|
||||
%i.07.unr38 = phi i32 [ 0, %unr.cmp42 ], [ %inc.unr30, %for.body.unr26 ]
|
||||
%31 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr37, i8* %3, i32 %or, i32 -8)
|
||||
%32 = load i64, i64* %pvar6.09.unr36, align 8, !tbaa !1
|
||||
%inc.unr39 = add nsw i32 %i.07.unr38, 1
|
||||
%incdec.ptr4.unr40 = getelementptr inbounds i64, i64* %pvar6.09.unr36, i32 1
|
||||
%cmp.unr41 = icmp slt i32 %inc.unr39, %sub
|
||||
%33 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%34 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %30, i64 %32, i64 %33)
|
||||
br label %for.body.unr44
|
||||
|
||||
for.body.unr44: ; preds = %for.body.unr35, %unr.cmp51
|
||||
%35 = phi i64 [ %6, %unr.cmp51 ], [ %34, %for.body.unr35 ]
|
||||
%pvar6.09.unr45 = phi i64* [ %7, %unr.cmp51 ], [ %incdec.ptr4.unr40, %for.body.unr35 ]
|
||||
%var8.0.in8.unr46 = phi i8* [ %4, %unr.cmp51 ], [ %31, %for.body.unr35 ]
|
||||
%i.07.unr47 = phi i32 [ 0, %unr.cmp51 ], [ %inc.unr39, %for.body.unr35 ]
|
||||
%36 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr46, i8* %3, i32 %or, i32 -8)
|
||||
%37 = load i64, i64* %pvar6.09.unr45, align 8, !tbaa !1
|
||||
%inc.unr48 = add nsw i32 %i.07.unr47, 1
|
||||
%incdec.ptr4.unr49 = getelementptr inbounds i64, i64* %pvar6.09.unr45, i32 1
|
||||
%cmp.unr50 = icmp slt i32 %inc.unr48, %sub
|
||||
%38 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%39 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %35, i64 %37, i64 %38)
|
||||
br label %for.body.unr53
|
||||
|
||||
for.body.unr53: ; preds = %for.body.unr44, %unr.cmp60
|
||||
%40 = phi i64 [ %6, %unr.cmp60 ], [ %39, %for.body.unr44 ]
|
||||
%pvar6.09.unr54 = phi i64* [ %7, %unr.cmp60 ], [ %incdec.ptr4.unr49, %for.body.unr44 ]
|
||||
%var8.0.in8.unr55 = phi i8* [ %4, %unr.cmp60 ], [ %36, %for.body.unr44 ]
|
||||
%i.07.unr56 = phi i32 [ 0, %unr.cmp60 ], [ %inc.unr48, %for.body.unr44 ]
|
||||
%41 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr55, i8* %3, i32 %or, i32 -8)
|
||||
%42 = load i64, i64* %pvar6.09.unr54, align 8, !tbaa !1
|
||||
%inc.unr57 = add nsw i32 %i.07.unr56, 1
|
||||
%incdec.ptr4.unr58 = getelementptr inbounds i64, i64* %pvar6.09.unr54, i32 1
|
||||
%cmp.unr59 = icmp slt i32 %inc.unr57, %sub
|
||||
%43 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%44 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %40, i64 %42, i64 %43)
|
||||
br label %for.body.lr.ph.split
|
||||
|
||||
for.body.lr.ph.split: ; preds = %for.body.unr53
|
||||
%45 = icmp ult i32 %10, 8
|
||||
br i1 %45, label %for.end.loopexit, label %for.body.lr.ph.split.split
|
||||
|
||||
for.body.lr.ph.split.split: ; preds = %for.body.lr.ph.split, %for.body.lr.ph
|
||||
%.unr = phi i64 [ %44, %for.body.lr.ph.split ], [ %6, %for.body.lr.ph ]
|
||||
%pvar6.09.unr62 = phi i64* [ %incdec.ptr4.unr58, %for.body.lr.ph.split ], [ %7, %for.body.lr.ph ]
|
||||
%var8.0.in8.unr63 = phi i8* [ %41, %for.body.lr.ph.split ], [ %4, %for.body.lr.ph ]
|
||||
%i.07.unr64 = phi i32 [ %inc.unr57, %for.body.lr.ph.split ], [ 0, %for.body.lr.ph ]
|
||||
%.lcssa12.unr = phi i64 [ %44, %for.body.lr.ph.split ], [ 0, %for.body.lr.ph ]
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph.split.split
|
||||
%46 = phi i64 [ %.unr, %for.body.lr.ph.split.split ], [ %78, %for.body ]
|
||||
%pvar6.09 = phi i64* [ %pvar6.09.unr62, %for.body.lr.ph.split.split ], [ %scevgep71, %for.body ]
|
||||
%var8.0.in8 = phi i8* [ %var8.0.in8.unr63, %for.body.lr.ph.split.split ], [ %75, %for.body ]
|
||||
%i.07 = phi i32 [ %i.07.unr64, %for.body.lr.ph.split.split ], [ %inc.7, %for.body ]
|
||||
%47 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8, i8* %3, i32 %or, i32 -8)
|
||||
%48 = load i64, i64* %pvar6.09, align 8, !tbaa !1
|
||||
%inc = add nsw i32 %i.07, 1
|
||||
%49 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%50 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %46, i64 %48, i64 %49)
|
||||
%51 = call i8* @llvm.hexagon.circ.ldd(i8* %47, i8* %3, i32 %or, i32 -8)
|
||||
%scevgep = getelementptr i64, i64* %pvar6.09, i32 1
|
||||
%52 = load i64, i64* %scevgep, align 8, !tbaa !1
|
||||
%inc.1 = add nsw i32 %inc, 1
|
||||
%53 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%54 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %50, i64 %52, i64 %53)
|
||||
%55 = call i8* @llvm.hexagon.circ.ldd(i8* %51, i8* %3, i32 %or, i32 -8)
|
||||
%scevgep65 = getelementptr i64, i64* %scevgep, i32 1
|
||||
%56 = load i64, i64* %scevgep65, align 8, !tbaa !1
|
||||
%inc.2 = add nsw i32 %inc.1, 1
|
||||
%57 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%58 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %54, i64 %56, i64 %57)
|
||||
%59 = call i8* @llvm.hexagon.circ.ldd(i8* %55, i8* %3, i32 %or, i32 -8)
|
||||
%scevgep66 = getelementptr i64, i64* %scevgep65, i32 1
|
||||
%60 = load i64, i64* %scevgep66, align 8, !tbaa !1
|
||||
%inc.3 = add nsw i32 %inc.2, 1
|
||||
%61 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%62 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %58, i64 %60, i64 %61)
|
||||
%63 = call i8* @llvm.hexagon.circ.ldd(i8* %59, i8* %3, i32 %or, i32 -8)
|
||||
%scevgep67 = getelementptr i64, i64* %scevgep66, i32 1
|
||||
%64 = load i64, i64* %scevgep67, align 8, !tbaa !1
|
||||
%inc.4 = add nsw i32 %inc.3, 1
|
||||
%65 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%66 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %62, i64 %64, i64 %65)
|
||||
%67 = call i8* @llvm.hexagon.circ.ldd(i8* %63, i8* %3, i32 %or, i32 -8)
|
||||
%scevgep68 = getelementptr i64, i64* %scevgep67, i32 1
|
||||
%68 = load i64, i64* %scevgep68, align 8, !tbaa !1
|
||||
%inc.5 = add nsw i32 %inc.4, 1
|
||||
%69 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%70 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %66, i64 %68, i64 %69)
|
||||
%71 = call i8* @llvm.hexagon.circ.ldd(i8* %67, i8* %3, i32 %or, i32 -8)
|
||||
%scevgep69 = getelementptr i64, i64* %scevgep68, i32 1
|
||||
%72 = load i64, i64* %scevgep69, align 8, !tbaa !1
|
||||
%inc.6 = add nsw i32 %inc.5, 1
|
||||
%73 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%74 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %70, i64 %72, i64 %73)
|
||||
%75 = call i8* @llvm.hexagon.circ.ldd(i8* %71, i8* %3, i32 %or, i32 -8)
|
||||
%scevgep70 = getelementptr i64, i64* %scevgep69, i32 1
|
||||
%76 = load i64, i64* %scevgep70, align 8, !tbaa !1
|
||||
%inc.7 = add nsw i32 %inc.6, 1
|
||||
%77 = load i64, i64* %var4, align 8, !tbaa !1
|
||||
%78 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %74, i64 %76, i64 %77)
|
||||
%cmp.7 = icmp slt i32 %inc.7, %sub
|
||||
%scevgep71 = getelementptr i64, i64* %scevgep70, i32 1
|
||||
br i1 %cmp.7, label %for.body, label %for.end.loopexit.unr-lcssa
|
||||
|
||||
for.end.loopexit.unr-lcssa: ; preds = %for.body
|
||||
%.lcssa12.ph = phi i64 [ %78, %for.body ]
|
||||
br label %for.end.loopexit
|
||||
|
||||
for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.lr.ph.split
|
||||
%.lcssa12 = phi i64 [ %44, %for.body.lr.ph.split ], [ %.lcssa12.ph, %for.end.loopexit.unr-lcssa ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%.lcssa = phi i64 [ %6, %entry ], [ %.lcssa12, %for.end.loopexit ]
|
||||
%79 = call i32 @llvm.hexagon.S2.vrndpackwhs(i64 %.lcssa)
|
||||
ret i32 %79
|
||||
}
|
||||
|
||||
declare i64 @llvm.hexagon.M2.vdmacs.s1(i64, i64, i64) nounwind readnone
|
||||
|
||||
declare i32 @llvm.hexagon.S2.vrndpackwhs(i64) nounwind readnone
|
||||
|
||||
!0 = !{!"long long", !1}
|
||||
!1 = !{!"omnipotent char", !2}
|
||||
!2 = !{!"Simple C/C++ TBAA"}
|
18
test/CodeGen/Hexagon/circ_ldw.ll
Normal file
18
test/CodeGen/Hexagon/circ_ldw.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
|
||||
; CHECK: r{{[0-9]*}} = memw(r{{[0-9]*.}}++{{.}}#-4:circ(m0))
|
||||
|
||||
|
||||
%union.vect64 = type { i64 }
|
||||
%union.vect32 = type { i32 }
|
||||
|
||||
define i32* @HallowedBeThyName(%union.vect64* nocapture %pRx, %union.vect32* %pLut, %union.vect64* nocapture %pOut, i64 %dc.coerce, i32 %shift, i32 %numSamples) nounwind {
|
||||
entry:
|
||||
%vLutNext = alloca i32, align 4
|
||||
%0 = bitcast %union.vect32* %pLut to i8*
|
||||
%1 = bitcast i32* %vLutNext to i8*
|
||||
%2 = call i8* @llvm.hexagon.circ.ldw(i8* %0, i8* %1, i32 83886144, i32 -4)
|
||||
%3 = bitcast i8* %2 to i32*
|
||||
ret i32* %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.ldw(i8*, i8*, i32, i32) nounwind
|
108
test/CodeGen/Hexagon/circ_st.ll
Normal file
108
test/CodeGen/Hexagon/circ_st.ll
Normal file
@ -0,0 +1,108 @@
|
||||
; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s
|
||||
; Testing for these 5 variants of circular store:
|
||||
; Q6_circ_store_update_B(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_store_update_D(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_store_update_HL(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_store_update_HH(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; Q6_circ_store_update_W(inputLR, pDelay, -1, nConvLength, 4);
|
||||
; producing these
|
||||
; memb(r1++#-1:circ(m0)) = r3
|
||||
; memd(r1++#-8:circ(m0)) = r1:0
|
||||
; memh(r1++#-2:circ(m0)) = r3
|
||||
; memh(r1++#-2:circ(m0)) = r3.h
|
||||
; memw(r1++#-4:circ(m0)) = r0
|
||||
|
||||
; ModuleID = 'circ_st.i'
|
||||
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
|
||||
target triple = "hexagon"
|
||||
|
||||
define zeroext i8 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr2 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%or = or i32 %shr2, 33554432
|
||||
; CHECK: memb(r{{[0-9]*}}{{.}}++{{.}}#-1:circ(m{{[0-1]}}))
|
||||
%1 = tail call i8* @llvm.hexagon.circ.stb(i8* %0, i32 0, i32 %or, i32 -1)
|
||||
%2 = load i8, i8* %1, align 1, !tbaa !0
|
||||
ret i8 %2
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.stb(i8*, i32, i32, i32) nounwind
|
||||
|
||||
define i64 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%shl = shl nuw nsw i32 %shr1, 3
|
||||
%or = or i32 %shl, 83886080
|
||||
; CHECK: memd(r{{[0-9]*}}{{.}}++{{.}}#-8:circ(m{{[0-1]}}))
|
||||
%1 = tail call i8* @llvm.hexagon.circ.std(i8* %0, i64 undef, i32 %or, i32 -8)
|
||||
%2 = bitcast i8* %1 to i64*
|
||||
%3 = load i64, i64* %2, align 8, !tbaa !0
|
||||
ret i64 %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.std(i8*, i64, i32, i32) nounwind
|
||||
|
||||
define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr2 = and i32 %conv, 65534
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%or = or i32 %shr2, 50331648
|
||||
; CHECK: memh(r{{[0-9]*}}{{.}}++{{.}}#-2:circ(m{{[0-1]}}))
|
||||
%1 = tail call i8* @llvm.hexagon.circ.sth(i8* %0, i32 0, i32 %or, i32 -2)
|
||||
%2 = bitcast i8* %1 to i16*
|
||||
%3 = load i16, i16* %2, align 2, !tbaa !2
|
||||
ret i16 %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.sth(i8*, i32, i32, i32) nounwind
|
||||
|
||||
define signext i16 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr2 = and i32 %conv, 65534
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%or = or i32 %shr2, 50331648
|
||||
; CHECK: memh(r{{[0-9]*}}{{.}}++{{.}}#-2:circ(m{{[0-1]}})){{ *}}={{ *}}r{{[0-9]*}}.h
|
||||
%1 = tail call i8* @llvm.hexagon.circ.sthhi(i8* %0, i32 0, i32 %or, i32 -2)
|
||||
%2 = bitcast i8* %1 to i16*
|
||||
%3 = load i16, i16* %2, align 2, !tbaa !2
|
||||
ret i16 %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.sthhi(i8*, i32, i32, i32) nounwind
|
||||
|
||||
define i32 @foo6(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
|
||||
entry:
|
||||
%conv = zext i16 %filtMemLen to i32
|
||||
%shr1 = lshr i32 %conv, 1
|
||||
%idxprom = sext i16 %filtMemIndex to i32
|
||||
%arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
|
||||
%0 = bitcast i16* %arrayidx to i8*
|
||||
%shl = shl nuw nsw i32 %shr1, 2
|
||||
%or = or i32 %shl, 67108864
|
||||
; CHECK: memw(r{{[0-9]*}}{{.}}++{{.}}#-4:circ(m{{[0-1]}}))
|
||||
%1 = tail call i8* @llvm.hexagon.circ.stw(i8* %0, i32 undef, i32 %or, i32 -4)
|
||||
%2 = bitcast i8* %1 to i32*
|
||||
%3 = load i32, i32* %2, align 4, !tbaa !3
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
declare i8* @llvm.hexagon.circ.stw(i8*, i32, i32, i32) nounwind
|
||||
|
||||
!0 = !{!"omnipotent char", !1}
|
||||
!1 = !{!"Simple C/C++ TBAA"}
|
||||
!2 = !{!"short", !0}
|
||||
!3 = !{!"int", !0}
|
Loading…
Reference in New Issue
Block a user