mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
d8e91e816b
As Wei Mi is reporting in post-commit review https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20201116/853479.html teaching -reassociate about add-like-or's (70472f3) results in breaking apart load widening patterns, and reassociating them. For now, simply exclude any such `or` that appears to be a root of load widening idiom from the or->add transformation. Note that the heuristic is greedy, it doesn't ensure that loads can *actually* be widened into a single load.
154 lines
5.1 KiB
LLVM
154 lines
5.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -reassociate -S | FileCheck %s
|
|
|
|
; Basic pattern where two contiguous i8 loads form a wider i16 load
|
|
define i16 @p0_i8_i8_i16(i8* %ptr) {
|
|
; CHECK-LABEL: @p0_i8_i8_i16(
|
|
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 1
|
|
; CHECK-NEXT: [[I2:%.*]] = load i8, i8* [[I]], align 1
|
|
; CHECK-NEXT: [[I3:%.*]] = zext i8 [[I2]] to i16
|
|
; CHECK-NEXT: [[I4:%.*]] = shl i16 [[I3]], 8
|
|
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
|
|
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
|
|
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I4]], [[I6]]
|
|
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
|
|
; CHECK-NEXT: ret i16 [[I8]]
|
|
;
|
|
%i = getelementptr inbounds i8, i8* %ptr, i64 1
|
|
%i2 = load i8, i8* %i
|
|
%i3 = zext i8 %i2 to i16
|
|
%i4 = shl i16 %i3, 8
|
|
%i5 = load i8, i8* %ptr
|
|
%i6 = zext i8 %i5 to i16
|
|
%i7 = or i16 %i4, %i6
|
|
%i8 = add i16 %i7, 42
|
|
ret i16 %i8
|
|
}
|
|
|
|
; Basic pattern where two contiguous i8 loads form a wider i16 load, with swapped endianness
|
|
define i16 @p1_i8_i8_i16_swapped(i8* %ptr) {
|
|
; CHECK-LABEL: @p1_i8_i8_i16_swapped(
|
|
; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
|
|
; CHECK-NEXT: [[I2:%.*]] = zext i8 [[I]] to i16
|
|
; CHECK-NEXT: [[I3:%.*]] = shl i16 [[I2]], 8
|
|
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 1
|
|
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[I4]], align 1
|
|
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
|
|
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I3]], [[I6]]
|
|
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
|
|
; CHECK-NEXT: ret i16 [[I8]]
|
|
;
|
|
%i = load i8, i8* %ptr
|
|
%i2 = zext i8 %i to i16
|
|
%i3 = shl i16 %i2, 8
|
|
%i4 = getelementptr inbounds i8, i8* %ptr, i64 1
|
|
%i5 = load i8, i8* %i4
|
|
%i6 = zext i8 %i5 to i16
|
|
%i7 = or i16 %i3, %i6
|
|
%i8 = add i16 %i7, 42
|
|
ret i16 %i8
|
|
}
|
|
|
|
; Loads are spaced out by a bit, but we don't check for that.
|
|
define i16 @p2(i8* %ptr) {
|
|
; CHECK-LABEL: @p2(
|
|
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 1
|
|
; CHECK-NEXT: [[I2:%.*]] = load i8, i8* [[I]], align 1
|
|
; CHECK-NEXT: [[I3:%.*]] = zext i8 [[I2]] to i16
|
|
; CHECK-NEXT: [[I4:%.*]] = shl i16 [[I3]], 9
|
|
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
|
|
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
|
|
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I4]], [[I6]]
|
|
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
|
|
; CHECK-NEXT: ret i16 [[I8]]
|
|
;
|
|
%i = getelementptr inbounds i8, i8* %ptr, i64 1
|
|
%i2 = load i8, i8* %i
|
|
%i3 = zext i8 %i2 to i16
|
|
%i4 = shl i16 %i3, 9 ; wrong shift amount
|
|
%i5 = load i8, i8* %ptr
|
|
%i6 = zext i8 %i5 to i16
|
|
%i7 = or i16 %i4, %i6
|
|
%i8 = add i16 %i7, 42
|
|
ret i16 %i8
|
|
}
|
|
|
|
; Both bytes are the same, but we don't check for that.
|
|
define i16 @p3(i8* %ptr) {
|
|
; CHECK-LABEL: @p3(
|
|
; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
|
|
; CHECK-NEXT: [[I2:%.*]] = zext i8 [[I]] to i16
|
|
; CHECK-NEXT: [[I3:%.*]] = shl i16 [[I2]], 8
|
|
; CHECK-NEXT: [[I4:%.*]] = or i16 [[I3]], [[I2]]
|
|
; CHECK-NEXT: [[I5:%.*]] = add i16 [[I4]], 42
|
|
; CHECK-NEXT: ret i16 [[I5]]
|
|
;
|
|
%i = load i8, i8* %ptr
|
|
%i2 = zext i8 %i to i16
|
|
%i3 = shl i16 %i2, 8
|
|
%i4 = or i16 %i3, %i2
|
|
%i5 = add i16 %i4, 42
|
|
ret i16 %i5
|
|
}
|
|
|
|
; ---------------------------------------------------------------------------- ;
|
|
; Negative tests, should be transformed.
|
|
|
|
; Low bits are not a load
|
|
define i16 @n4(i8* %ptr) {
|
|
; CHECK-LABEL: @n4(
|
|
; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
|
|
; CHECK-NEXT: [[I2:%.*]] = zext i8 [[I]] to i16
|
|
; CHECK-NEXT: [[I3:%.*]] = shl i16 [[I2]], 8
|
|
; CHECK-NEXT: [[I5:%.*]] = add i16 [[I3]], 84
|
|
; CHECK-NEXT: ret i16 [[I5]]
|
|
;
|
|
%i = load i8, i8* %ptr
|
|
%i2 = zext i8 %i to i16
|
|
%i3 = shl i16 %i2, 8
|
|
%i4 = or i16 %i3, 42 ; Second operand is bad
|
|
%i5 = add i16 %i4, 42
|
|
ret i16 %i5
|
|
}
|
|
|
|
; Low bits are not a load
|
|
define i16 @n5(i8* %ptr, i8 %lowbits) {
|
|
; CHECK-LABEL: @n5(
|
|
; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
|
|
; CHECK-NEXT: [[I2:%.*]] = zext i8 [[I]] to i16
|
|
; CHECK-NEXT: [[I3:%.*]] = shl i16 [[I2]], 8
|
|
; CHECK-NEXT: [[I4:%.*]] = zext i8 [[LOWBITS:%.*]] to i16
|
|
; CHECK-NEXT: [[I5:%.*]] = add i16 [[I4]], 42
|
|
; CHECK-NEXT: [[I6:%.*]] = add i16 [[I5]], [[I3]]
|
|
; CHECK-NEXT: ret i16 [[I6]]
|
|
;
|
|
%i = load i8, i8* %ptr
|
|
%i2 = zext i8 %i to i16
|
|
%i3 = shl i16 %i2, 8
|
|
%i4 = zext i8 %lowbits to i16 ; base operand is bad
|
|
%i5 = or i16 %i3, %i4
|
|
%i6 = add i16 %i5, 42
|
|
ret i16 %i6
|
|
}
|
|
|
|
; High bits are not a load
|
|
define i16 @n6(i8* %ptr, i8 %highbits) {
|
|
; CHECK-LABEL: @n6(
|
|
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 1
|
|
; CHECK-NEXT: [[I4:%.*]] = shl i16 42, 8
|
|
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
|
|
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
|
|
; CHECK-NEXT: [[I7:%.*]] = add i16 [[I4]], 42
|
|
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I6]]
|
|
; CHECK-NEXT: ret i16 [[I8]]
|
|
;
|
|
%i = getelementptr inbounds i8, i8* %ptr, i64 1
|
|
%i2 = load i8, i8* %i
|
|
%i4 = shl i16 42, 8 ; base operand is bad
|
|
%i5 = load i8, i8* %ptr
|
|
%i6 = zext i8 %i5 to i16
|
|
%i7 = or i16 %i4, %i6
|
|
%i8 = add i16 %i7, 42
|
|
ret i16 %i8
|
|
}
|