1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/ARM/vector-promotion.ll
Kristof Beyls 8cf5d38fcf Don't conditionalize Neon instructions, even in IT blocks.
This has been deprecated since ARMARM v7-AR, release C.b, published back
in 2012.

This also removes test/CodeGen/Thumb2/ifcvt-neon.ll that originally was
introduced to check that conditionalization of Neon instructions did
happen when generating Thumb2. However, the test had evolved and was no
longer testing that. Rather than trying to adapt that test, this commit
introduces test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir, since we can
now use the MIR framework to write nicer/more maintainable tests.

llvm-svn: 305998
2017-06-22 12:11:38 +00:00

404 lines
17 KiB
LLVM

; RUN: opt -codegenprepare -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-NORMAL %s
; RUN: opt -codegenprepare -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S -stress-cgp-store-extract | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-STRESS %s
; RUN: llc -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon | FileCheck --check-prefix=ASM %s
; IR-BOTH-LABEL: @simpleOneInstructionPromotion
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 undef, i32 1>
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR]], i32 1
; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
; IR-BOTH-NEXT: ret
;
; Make sure we got rid of any expensive vmov.32 instructions.
; ASM-LABEL: simpleOneInstructionPromotion:
; ASM: vldr [[LOAD:d[0-9]+]], [r0]
; ASM-NEXT: vorr.i32 [[LOAD]], #0x1
; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1:32]
; ASM-NEXT: bx
define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 4
ret void
}
; IR-BOTH-LABEL: @unsupportedInstructionForPromotion
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
; IR-BOTH-NEXT: [[CMP:%[a-zA-Z_0-9-]+]] = icmp eq i32 [[EXTRACT]], %in2
; IR-BOTH-NEXT: store i1 [[CMP]], i1* %dest
; IR-BOTH-NEXT: ret
;
; ASM-LABEL: unsupportedInstructionForPromotion:
; ASM: vldr [[LOAD:d[0-9]+]], [r0]
; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
; ASM: bx
define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 0
%out = icmp eq i32 %extract, %in2
store i1 %out, i1* %dest, align 4
ret void
}
; IR-BOTH-LABEL: @unsupportedChainInDifferentBBs
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
; IR-BOTH-NEXT: br i1 %bool, label %bb2, label %end
; BB2
; IR-BOTH: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest, align 4
; IR-BOTH: ret
;
; ASM-LABEL: unsupportedChainInDifferentBBs:
; ASM: vldr [[LOAD:d[0-9]+]], [r0]
; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
; ASM: bx
define void @unsupportedChainInDifferentBBs(<2 x i32>* %addr1, i32* %dest, i1 %bool) {
bb1:
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 0
br i1 %bool, label %bb2, label %end
bb2:
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 4
br label %end
end:
ret void
}
; IR-LABEL: @chainOfInstructionsToPromote
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[VECTOR_OR1:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[VECTOR_OR2:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR1]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[VECTOR_OR3:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR2]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[VECTOR_OR4:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR3]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[VECTOR_OR5:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR4]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[VECTOR_OR6:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR5]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[VECTOR_OR7:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR6]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR7]], i32 0
; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
; IR-BOTH-NEXT: ret
;
; ASM-LABEL: chainOfInstructionsToPromote:
; ASM: vldr [[LOAD:d[0-9]+]], [r0]
; ASM-NOT: vmov.32 {{r[0-9]+}}, [[LOAD]]
; ASM: bx
define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 0
%out1 = or i32 %extract, 1
%out2 = or i32 %out1, 1
%out3 = or i32 %out2, 1
%out4 = or i32 %out3, 1
%out5 = or i32 %out4, 1
%out6 = or i32 %out5, 1
%out7 = or i32 %out6, 1
store i32 %out7, i32* %dest, align 4
ret void
}
; IR-BOTH-LABEL: @unsupportedMultiUses
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-BOTH-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest
; IR-BOTH-NEXT: ret i32 [[OR]]
;
; ASM-LABEL: unsupportedMultiUses:
; ASM: vldr [[LOAD:d[0-9]+]], [r0]
; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
; ASM: bx
define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 4
ret i32 %out
}
; Check that we promote we a splat constant when this is a division.
; The NORMAL mode does not promote anything as divisions are not legal.
; IR-BOTH-LABEL: @udivCase
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 [[EXTRACT]], 7
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = udiv <2 x i32> [[LOAD]], <i32 7, i32 7>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = udiv i32 %extract, 7
store i32 %out, i32* %dest, align 4
ret void
}
; IR-BOTH-LABEL: @uremCase
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = urem i32 [[EXTRACT]], 7
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = urem <2 x i32> [[LOAD]], <i32 7, i32 7>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = urem i32 %extract, 7
store i32 %out, i32* %dest, align 4
ret void
}
; IR-BOTH-LABEL: @sdivCase
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sdiv i32 [[EXTRACT]], 7
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = sdiv <2 x i32> [[LOAD]], <i32 7, i32 7>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = sdiv i32 %extract, 7
store i32 %out, i32* %dest, align 4
ret void
}
; IR-BOTH-LABEL: @sremCase
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 [[EXTRACT]], 7
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = srem <2 x i32> [[LOAD]], <i32 7, i32 7>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = srem i32 %extract, 7
store i32 %out, i32* %dest, align 4
ret void
}
; IR-BOTH-LABEL: @fdivCase
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fdiv float [[EXTRACT]], 7.0
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fdiv <2 x float> [[LOAD]], <float 7.000000e+00, float 7.000000e+00>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @fdivCase(<2 x float>* %addr1, float* %dest) {
%in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = fdiv float %extract, 7.0
store float %out, float* %dest, align 4
ret void
}
; IR-BOTH-LABEL: @fremCase
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem float [[EXTRACT]], 7.0
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem <2 x float> [[LOAD]], <float 7.000000e+00, float 7.000000e+00>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @fremCase(<2 x float>* %addr1, float* %dest) {
%in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = frem float %extract, 7.0
store float %out, float* %dest, align 4
ret void
}
; Check that we do not promote when we may introduce undefined behavior
; like division by zero.
; IR-BOTH-LABEL: @undefDivCase
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 7, [[EXTRACT]]
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @undefDivCase(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = udiv i32 7, %extract
store i32 %out, i32* %dest, align 4
ret void
}
; Check that we do not promote when we may introduce undefined behavior
; like division by zero.
; IR-BOTH-LABEL: @undefRemCase
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 7, [[EXTRACT]]
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = srem i32 7, %extract
store i32 %out, i32* %dest, align 4
ret void
}
; Check that we use an undef mask for undefined behavior if the fast-math
; flag is set.
; IR-BOTH-LABEL: @undefConstantFRemCaseWithFastMath
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float [[EXTRACT]], 7.0
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem nnan <2 x float> [[LOAD]], <float undef, float 7.000000e+00>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
%in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = frem nnan float %extract, 7.0
store float %out, float* %dest, align 4
ret void
}
; Check that we use an undef mask for undefined behavior if the fast-math
; flag is set.
; IR-BOTH-LABEL: @undefVectorFRemCaseWithFastMath
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float 7.000000e+00, [[EXTRACT]]
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem nnan <2 x float> <float undef, float 7.000000e+00>, [[LOAD]]
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
%in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = frem nnan float 7.0, %extract
store float %out, float* %dest, align 4
ret void
}
; Check that we are able to promote floating point value.
; This requires the STRESS mode, as floating point value are
; not promote on armv7.
; IR-BOTH-LABEL: @simpleOneInstructionPromotionFloat
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fadd float [[EXTRACT]], 1.0
; Vector version:
; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fadd <2 x float> [[LOAD]], <float undef, float 1.000000e+00>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
;
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %dest) {
%in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = fadd float %extract, 1.0
store float %out, float* %dest, align 4
ret void
}
; Check that we correctly use a splat constant when we cannot
; determine at compile time the index of the extract.
; This requires the STRESS modes, as variable index are expensive
; to lower.
; IR-BOTH-LABEL: @simpleOneInstructionPromotionVariableIdx
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 %idx
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
; Vector version:
; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 1>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[OR]], i32 %idx
;
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %dest, i32 %idx) {
%in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 %idx
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 4
ret void
}
; Check a vector with more than 2 elements.
; This requires the STRESS mode because currently 'or v8i8' is not marked
; as legal or custom, althought the actual assembly is better if we were
; promoting it.
; IR-BOTH-LABEL: @simpleOneInstructionPromotion8x8
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>, <8 x i8>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i8 [[EXTRACT]], 1
; Vector version:
; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <8 x i8> [[LOAD]], <i8 undef, i8 1, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[OR]], i32 1
;
; IR-BOTH-NEXT: store i8 [[RES]], i8* %dest
; IR-BOTH-NEXT: ret
define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
%in1 = load <8 x i8>, <8 x i8>* %addr1, align 8
%extract = extractelement <8 x i8> %in1, i32 1
%out = or i8 %extract, 1
store i8 %out, i8* %dest, align 4
ret void
}
; Check that we optimized the sequence correctly when it can be
; lowered on a Q register.
; IR-BOTH-LABEL: @simpleOneInstructionPromotion
; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>, <4 x i32>* %addr1
; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <4 x i32> [[LOAD]], <i32 undef, i32 1, i32 undef, i32 undef>
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <4 x i32> [[VECTOR_OR]], i32 1
; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
; IR-BOTH-NEXT: ret
;
; Make sure we got rid of any expensive vmov.32 instructions.
; ASM-LABEL: simpleOneInstructionPromotion4x32:
; ASM: vld1.64 {[[LOAD:d[0-9]+]], d{{[0-9]+}}}, [r0]
; The Q register used here must be [[LOAD]] / 2, but we cannot express that.
; ASM-NEXT: vorr.i32 q{{[[0-9]+}}, #0x1
; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1]
; ASM-NEXT: bx
define void @simpleOneInstructionPromotion4x32(<4 x i32>* %addr1, i32* %dest) {
%in1 = load <4 x i32>, <4 x i32>* %addr1, align 8
%extract = extractelement <4 x i32> %in1, i32 1
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 1
ret void
}