1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00
llvm-mirror/test/CodeGen/ARM/legalize-bitcast.ll
Mikael Holmen d2c3d58963 [LegalizeTypes] Bugfixes for big-endian targets when handling BITCASTs
Summary:
This fixes PR44135.

The special case when we promote a bitcast from a vector to an int
needs special handling when we are on a big-endian target.

Prior to this fix, for the added vec_to_int we see the following in the
SelectionDAG printouts

Type-legalized selection DAG: %bb.1 'foo:bb.1'
SelectionDAG has 9 nodes:
  t0: ch = EntryToken
        t2: v8i16,ch = CopyFromReg t0, Register:v8i16 %0
      t17: v4i32 = bitcast t2
    t23: i32 = extract_vector_elt t17, Constant:i32<3>
  t8: ch,glue = CopyToReg t0, Register:i32 $r0, t23
  t9: ch = ARMISD::RET_FLAG t8, Register:i32 $r0, t8:1

and I think here the extract_vector_elt is wrong and extracts the value
from the wrong index.

The program program should return the 32 bits made up of the elements at
index 4 and 5 in the vec6 array, but with

    t23: i32 = extract_vector_elt t17, Constant:i32<3>

as far as I can tell, we will extract values that originally didn't even
exist in the vec6 vectore.

If we would instead extract the element at index 2 we would get the wanted
values.

With this fix we insert a right shift after the bitcast in
DAGTypeLegalizer::PromoteIntRes_BITCAST which then gives us

Type-legalized selection DAG: %bb.1 'vec_to_int:bb.1'
SelectionDAG has 9 nodes:
  t0: ch = EntryToken
        t2: v8i16,ch = CopyFromReg t0, Register:v8i16 %0
      t23: v4i32 = bitcast t2
    t27: i32 = extract_vector_elt t23, Constant:i32<2>
  t8: ch,glue = CopyToReg t0, Register:i32 $r0, t27
  t9: ch = ARMISD::RET_FLAG t8, Register:i32 $r0, t8:1

So now we get

    t27: i32 = extract_vector_elt t23, Constant:i32<2>

which is what we want.

Similarly, the new int_to_vec testcase exposes a bug where we cast the other
direction. Then we instead need to add a left shift before the bitcast on
big-endian targets for the bits in the input integer to end up at the exptected
place in the vector.

Reviewers: bogner, spatel, craig.topper, t.p.northover, dmgreen, efriedma, SjoerdMeijer, samparker

Reviewed By: efriedma

Subscribers: eli.friedman, bjope, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70942
2019-12-10 11:22:35 +01:00

60 lines
1.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O0 -mtriple=armebv7 -target-abi apcs -o - %s | FileCheck %s
@vec6_p = external global <6 x i16>
define i32 @vec_to_int() {
; CHECK-LABEL: vec_to_int:
; CHECK: @ %bb.0: @ %bb.0
; CHECK-NEXT: push {r4}
; CHECK-NEXT: sub sp, sp, #28
; CHECK-NEXT: movw r0, :lower16:vec6_p
; CHECK-NEXT: movt r0, :upper16:vec6_p
; CHECK-NEXT: vld1.8 {d16}, [r0]!
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: @ implicit-def: $d17
; CHECK-NEXT: vmov.32 d17[0], r0
; CHECK-NEXT: vrev32.16 d17, d17
; CHECK-NEXT: vrev16.8 d16, d16
; CHECK-NEXT: vmov.f64 d18, d16
; CHECK-NEXT: vmov.f64 d19, d17
; CHECK-NEXT: vstmia sp, {d18, d19} @ 16-byte Spill
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_1: @ %bb.1
; CHECK-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload
; CHECK-NEXT: vrev32.16 q9, q8
; CHECK-NEXT: @ kill: def $d19 killed $d19 killed $q9
; CHECK-NEXT: vmov.32 r0, d19[0]
; CHECK-NEXT: add sp, sp, #28
; CHECK-NEXT: pop {r4}
; CHECK-NEXT: bx lr
bb.0:
%vec6 = load <6 x i16>, <6 x i16>* @vec6_p, align 1
br label %bb.1
bb.1:
%0 = bitcast <6 x i16> %vec6 to i96
%1 = trunc i96 %0 to i32
ret i32 %1
}
define i16 @int_to_vec(i80 %in) {
; CHECK-LABEL: int_to_vec:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: lsl r0, r0, #16
; CHECK-NEXT: orr r0, r0, r1, lsr #16
; CHECK-NEXT: @ implicit-def: $d16
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: @ implicit-def: $q9
; CHECK-NEXT: vmov.f64 d18, d16
; CHECK-NEXT: vrev32.16 q9, q9
; CHECK-NEXT: @ kill: def $d18 killed $d18 killed $q9
; CHECK-NEXT: vmov.u16 r0, d18[0]
; CHECK-NEXT: bx lr
%vec = bitcast i80 %in to <5 x i16>
%e0 = extractelement <5 x i16> %vec, i32 0
ret i16 %e0
}