mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Support unaligned load/store on more ARM targets
This patch matches GCC behavior: the code used to only allow unaligned load/store on ARM for v6+ Darwin, it will now allow unaligned load/store for v6+ Darwin as well as for v7+ on Linux and NaCl. The distinction is made because v6 doesn't guarantee support (but LLVM assumes that Apple controls hardware+kernel and therefore have conformant v6 CPUs), whereas v7 does provide this guarantee (and Linux/NaCl behave sanely). The patch keeps the -arm-strict-align command line option, and adds -arm-no-strict-align. They behave similarly to GCC's -mstrict-align and -mnostrict-align. I originally encountered this discrepancy in FastIsel tests which expect unaligned load/store generation. Overall this should slightly improve performance in most cases because of reduced I$ pressure. llvm-svn: 182175
This commit is contained in:
parent
89d64471a3
commit
cbcaf8db77
@ -38,9 +38,24 @@ static cl::opt<bool>
|
||||
UseFusedMulOps("arm-use-mulops",
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
static cl::opt<bool>
|
||||
StrictAlign("arm-strict-align", cl::Hidden,
|
||||
cl::desc("Disallow all unaligned memory accesses"));
|
||||
enum AlignMode {
|
||||
DefaultAlign,
|
||||
StrictAlign,
|
||||
NoStrictAlign
|
||||
};
|
||||
|
||||
static cl::opt<AlignMode>
|
||||
Align(cl::desc("Load/store alignment support"),
|
||||
cl::Hidden, cl::init(DefaultAlign),
|
||||
cl::values(
|
||||
clEnumValN(DefaultAlign, "arm-default-align",
|
||||
"Generate unaligned accesses only on hardware/OS "
|
||||
"combinations that are known to support them"),
|
||||
clEnumValN(StrictAlign, "arm-strict-align",
|
||||
"Disallow all unaligned memory accesses"),
|
||||
clEnumValN(NoStrictAlign, "arm-no-strict-align",
|
||||
"Allow unaligned memory accesses"),
|
||||
clEnumValEnd));
|
||||
|
||||
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS, const TargetOptions &Options)
|
||||
@ -162,10 +177,32 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
|
||||
if (!isThumb() || hasThumb2())
|
||||
PostRAScheduler = true;
|
||||
|
||||
// v6+ may or may not support unaligned mem access depending on the system
|
||||
// configuration.
|
||||
if (!StrictAlign && hasV6Ops() && isTargetDarwin())
|
||||
AllowsUnalignedMem = true;
|
||||
switch (Align) {
|
||||
case DefaultAlign:
|
||||
// Assume pre-ARMv6 doesn't support unaligned accesses.
|
||||
//
|
||||
// ARMv6 may or may not support unaligned accesses depending on the
|
||||
// SCTLR.U bit, which is architecture-specific. We assume ARMv6
|
||||
// Darwin targets support unaligned accesses, and others don't.
|
||||
//
|
||||
// ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
|
||||
// which raises an alignment fault on unaligned accesses. Linux
|
||||
// defaults this bit to 0 and handles it as a system-wide (not
|
||||
// per-process) setting. It is therefore safe to assume that ARMv7+
|
||||
// Linux targets support unaligned accesses. The same goes for NaCl.
|
||||
//
|
||||
// The above behavior is consistent with GCC.
|
||||
AllowsUnalignedMem = (
|
||||
(hasV7Ops() && (isTargetLinux() || isTargetNaCl())) ||
|
||||
(hasV6Ops() && isTargetDarwin()));
|
||||
break;
|
||||
case StrictAlign:
|
||||
AllowsUnalignedMem = false;
|
||||
break;
|
||||
case NoStrictAlign:
|
||||
AllowsUnalignedMem = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
|
||||
uint64_t Bits = getFeatureBits();
|
||||
|
@ -270,9 +270,8 @@ public:
|
||||
|
||||
bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
|
||||
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
|
||||
bool isTargetNaCl() const {
|
||||
return TargetTriple.getOS() == Triple::NaCl;
|
||||
}
|
||||
bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; }
|
||||
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
|
||||
bool isTargetELF() const { return !isTargetDarwin(); }
|
||||
|
||||
bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
|
||||
|
144
test/CodeGen/ARM/fast-isel-align.ll
Normal file
144
test/CodeGen/ARM/fast-isel-align.ll
Normal file
@ -0,0 +1,144 @@
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
|
||||
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
|
||||
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
|
||||
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=THUMB
|
||||
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
|
||||
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
|
||||
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl | FileCheck %s --check-prefix=ARM
|
||||
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
|
||||
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
|
||||
; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM
|
||||
; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB
|
||||
; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
|
||||
; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
|
||||
|
||||
; Check unaligned stores
|
||||
%struct.anon = type <{ float }>
|
||||
|
||||
@a = common global %struct.anon* null, align 4
|
||||
|
||||
define void @unaligned_store(float %x, float %y) nounwind {
|
||||
entry:
|
||||
; ARM: @unaligned_store
|
||||
; ARM: vmov r1, s0
|
||||
; ARM: str r1, [r0]
|
||||
|
||||
; THUMB: @unaligned_store
|
||||
; THUMB: vmov r1, s0
|
||||
; THUMB: str r1, [r0]
|
||||
|
||||
%add = fadd float %x, %y
|
||||
%0 = load %struct.anon** @a, align 4
|
||||
%x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
|
||||
store float %add, float* %x1, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Doublewords require only word-alignment.
|
||||
; rdar://10528060
|
||||
%struct.anon.0 = type { double }
|
||||
|
||||
@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
|
||||
|
||||
define void @word_aligned_f64_store(double %a, double %b) nounwind {
|
||||
entry:
|
||||
; ARM: @word_aligned_f64_store
|
||||
; THUMB: @word_aligned_f64_store
|
||||
%add = fadd double %a, %b
|
||||
store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
|
||||
; ARM: vstr d16, [r0]
|
||||
; THUMB: vstr d16, [r0]
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check unaligned loads of floats
|
||||
%class.TAlignTest = type <{ i16, float }>
|
||||
|
||||
define zeroext i1 @unaligned_f32_load(%class.TAlignTest* %this) nounwind align 2 {
|
||||
entry:
|
||||
; ARM: @unaligned_f32_load
|
||||
; THUMB: @unaligned_f32_load
|
||||
%0 = alloca %class.TAlignTest*, align 4
|
||||
store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
|
||||
%1 = load %class.TAlignTest** %0
|
||||
%2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
|
||||
%3 = load float* %2, align 1
|
||||
%4 = fcmp une float %3, 0.000000e+00
|
||||
; ARM: ldr r[[R:[0-9]+]], [r0, #2]
|
||||
; ARM: vmov s0, r[[R]]
|
||||
; ARM: vcmpe.f32 s0, #0
|
||||
; THUMB: ldr.w r[[R:[0-9]+]], [r0, #2]
|
||||
; THUMB: vmov s0, r[[R]]
|
||||
; THUMB: vcmpe.f32 s0, #0
|
||||
ret i1 %4
|
||||
}
|
||||
|
||||
define void @unaligned_i16_store(i16 %x, i16* %y) nounwind {
|
||||
entry:
|
||||
; ARM-STRICT-ALIGN: @unaligned_i16_store
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
|
||||
; THUMB-STRICT-ALIGN: @unaligned_i16_store
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
|
||||
store i16 %x, i16* %y, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define i16 @unaligned_i16_load(i16* %x) nounwind {
|
||||
entry:
|
||||
; ARM-STRICT-ALIGN: @unaligned_i16_load
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
|
||||
; THUMB-STRICT-ALIGN: @unaligned_i16_load
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
|
||||
%0 = load i16* %x, align 1
|
||||
ret i16 %0
|
||||
}
|
||||
|
||||
define void @unaligned_i32_store(i32 %x, i32* %y) nounwind {
|
||||
entry:
|
||||
; ARM-STRICT-ALIGN: @unaligned_i32_store
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
|
||||
; THUMB-STRICT-ALIGN: @unaligned_i32_store
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
|
||||
store i32 %x, i32* %y, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @unaligned_i32_load(i32* %x) nounwind {
|
||||
entry:
|
||||
; ARM-STRICT-ALIGN: @unaligned_i32_load
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
|
||||
; THUMB-STRICT-ALIGN: @unaligned_i32_load
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
|
||||
%0 = load i32* %x, align 1
|
||||
ret i32 %0
|
||||
}
|
@ -1,7 +1,5 @@
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
|
||||
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
|
||||
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
|
||||
|
||||
; Very basic fast-isel functionality.
|
||||
define i32 @add(i32 %a, i32 %b) nounwind {
|
||||
@ -163,67 +161,6 @@ define void @test4() {
|
||||
; ARM: str r1, [r0]
|
||||
}
|
||||
|
||||
; Check unaligned stores
|
||||
%struct.anon = type <{ float }>
|
||||
|
||||
@a = common global %struct.anon* null, align 4
|
||||
|
||||
define void @unaligned_store(float %x, float %y) nounwind {
|
||||
entry:
|
||||
; ARM: @unaligned_store
|
||||
; ARM: vmov r1, s0
|
||||
; ARM: str r1, [r0]
|
||||
|
||||
; THUMB: @unaligned_store
|
||||
; THUMB: vmov r1, s0
|
||||
; THUMB: str r1, [r0]
|
||||
|
||||
%add = fadd float %x, %y
|
||||
%0 = load %struct.anon** @a, align 4
|
||||
%x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
|
||||
store float %add, float* %x1, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Doublewords require only word-alignment.
|
||||
; rdar://10528060
|
||||
%struct.anon.0 = type { double }
|
||||
|
||||
@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
|
||||
|
||||
define void @test5(double %a, double %b) nounwind {
|
||||
entry:
|
||||
; ARM: @test5
|
||||
; THUMB: @test5
|
||||
%add = fadd double %a, %b
|
||||
store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
|
||||
; ARM: vstr d16, [r0]
|
||||
; THUMB: vstr d16, [r0]
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check unaligned loads of floats
|
||||
%class.TAlignTest = type <{ i16, float }>
|
||||
|
||||
define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
|
||||
entry:
|
||||
; ARM: @test6
|
||||
; THUMB: @test6
|
||||
%0 = alloca %class.TAlignTest*, align 4
|
||||
store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
|
||||
%1 = load %class.TAlignTest** %0
|
||||
%2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
|
||||
%3 = load float* %2, align 1
|
||||
%4 = fcmp une float %3, 0.000000e+00
|
||||
; ARM: ldr r0, [r0, #2]
|
||||
; ARM: vmov s0, r0
|
||||
; ARM: vcmpe.f32 s0, #0
|
||||
; THUMB: ldr.w r0, [r0, #2]
|
||||
; THUMB: vmov s0, r0
|
||||
; THUMB: vcmpe.f32 s0, #0
|
||||
ret i1 %4
|
||||
}
|
||||
|
||||
; ARM: @urem_fold
|
||||
; THUMB: @urem_fold
|
||||
; ARM: and r0, r0, #31
|
||||
@ -233,10 +170,10 @@ define i32 @urem_fold(i32 %a) nounwind {
|
||||
ret i32 %rem
|
||||
}
|
||||
|
||||
define i32 @test7() noreturn nounwind {
|
||||
define i32 @trap_intrinsic() noreturn nounwind {
|
||||
entry:
|
||||
; ARM: @test7
|
||||
; THUMB: @test7
|
||||
; ARM: @trap_intrinsic
|
||||
; THUMB: @trap_intrinsic
|
||||
; ARM: trap
|
||||
; THUMB: trap
|
||||
tail call void @llvm.trap( )
|
||||
@ -244,67 +181,3 @@ entry:
|
||||
}
|
||||
|
||||
declare void @llvm.trap() nounwind
|
||||
|
||||
define void @unaligned_i16_store(i16 %x, i16* %y) nounwind {
|
||||
entry:
|
||||
; ARM-STRICT-ALIGN: @unaligned_i16_store
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
|
||||
; THUMB-STRICT-ALIGN: @unaligned_i16_store
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
|
||||
store i16 %x, i16* %y, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define i16 @unaligned_i16_load(i16* %x) nounwind {
|
||||
entry:
|
||||
; ARM-STRICT-ALIGN: @unaligned_i16_load
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
|
||||
; THUMB-STRICT-ALIGN: @unaligned_i16_load
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
|
||||
%0 = load i16* %x, align 1
|
||||
ret i16 %0
|
||||
}
|
||||
|
||||
define void @unaligned_i32_store(i32 %x, i32* %y) nounwind {
|
||||
entry:
|
||||
; ARM-STRICT-ALIGN: @unaligned_i32_store
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
; ARM-STRICT-ALIGN: strb
|
||||
|
||||
; THUMB-STRICT-ALIGN: @unaligned_i32_store
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
; THUMB-STRICT-ALIGN: strb
|
||||
|
||||
store i32 %x, i32* %y, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @unaligned_i32_load(i32* %x) nounwind {
|
||||
entry:
|
||||
; ARM-STRICT-ALIGN: @unaligned_i32_load
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
; ARM-STRICT-ALIGN: ldrb
|
||||
|
||||
; THUMB-STRICT-ALIGN: @unaligned_i32_load
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
; THUMB-STRICT-ALIGN: ldrb
|
||||
|
||||
%0 = load i32* %x, align 1
|
||||
ret i32 %0
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user