1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00
llvm-mirror/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
Craig Topper 71f437f406 [LegalizeTypes] Improve ExpandIntRes_XMULO codegen.
The code previously used two BUILD_PAIRs to concatenate the two UMULO
results with 0s in the lower bits to match original VT. Then it created
an ADD and a UADDO with the original bit width. Each of those operations
need to be expanded since they have illegal types.

Since we put 0s in the lower bits before the ADD, the lower half of the
ADD result will be 0. So the lower half of the UADDO result is
solely determined by the other operand. Since the UADDO need to
be split in half, we don't really needd an operation for the lower
bits. Unfortunately, we don't see that in type legalization and end up
creating something more complicated and DAG combine or
lowering aren't always able to recover it.

This patch directly generates the narrower ADD and UADDO to avoid
needing to legalize them. Now only the MUL is done on the original
type.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D97440
2021-03-01 09:54:32 -08:00

156 lines
5.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC64
; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC32
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; PPC64-LABEL: muloti_test:
; PPC64: # %bb.0: # %start
; PPC64-NEXT: addic 8, 5, -1
; PPC64-NEXT: mulhdu 9, 5, 4
; PPC64-NEXT: mulld 10, 5, 4
; PPC64-NEXT: subfe 5, 8, 5
; PPC64-NEXT: mulld 8, 3, 6
; PPC64-NEXT: add 8, 8, 10
; PPC64-NEXT: addic 10, 3, -1
; PPC64-NEXT: mulhdu 7, 3, 6
; PPC64-NEXT: subfe 3, 10, 3
; PPC64-NEXT: and 5, 3, 5
; PPC64-NEXT: addic 3, 7, -1
; PPC64-NEXT: subfe 7, 3, 7
; PPC64-NEXT: or 5, 5, 7
; PPC64-NEXT: mulhdu 10, 4, 6
; PPC64-NEXT: addic 7, 9, -1
; PPC64-NEXT: add 3, 10, 8
; PPC64-NEXT: subfe 7, 7, 9
; PPC64-NEXT: or 5, 5, 7
; PPC64-NEXT: subc 7, 3, 10
; PPC64-NEXT: subfe 7, 3, 3
; PPC64-NEXT: neg 7, 7
; PPC64-NEXT: or 5, 5, 7
; PPC64-NEXT: mulld 4, 4, 6
; PPC64-NEXT: blr
;
; PPC32-LABEL: muloti_test:
; PPC32: # %bb.0: # %start
; PPC32-NEXT: mflr 0
; PPC32-NEXT: stw 0, 4(1)
; PPC32-NEXT: stwu 1, -64(1)
; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill
; PPC32-NEXT: mfcr 12
; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill
; PPC32-NEXT: mr 26, 7
; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill
; PPC32-NEXT: mr 28, 4
; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill
; PPC32-NEXT: mr 29, 8
; PPC32-NEXT: mr 24, 3
; PPC32-NEXT: li 3, 0
; PPC32-NEXT: li 4, 0
; PPC32-NEXT: li 7, 0
; PPC32-NEXT: li 8, 0
; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill
; PPC32-NEXT: mr 23, 5
; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill
; PPC32-NEXT: mr 25, 9
; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill
; PPC32-NEXT: mr 27, 6
; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill
; PPC32-NEXT: mr 30, 10
; PPC32-NEXT: stw 12, 24(1)
; PPC32-NEXT: bl __multi3
; PPC32-NEXT: mulhwu. 9, 26, 27
; PPC32-NEXT: mfcr 9 # cr0
; PPC32-NEXT: cmpwi 2, 26, 0
; PPC32-NEXT: stw 9, 20(1)
; PPC32-NEXT: cmpwi 3, 23, 0
; PPC32-NEXT: crnor 12, 14, 10
; PPC32-NEXT: cmpwi 4, 24, 0
; PPC32-NEXT: mulhwu. 9, 23, 29
; PPC32-NEXT: mcrf 5, 0
; PPC32-NEXT: cmpwi 1, 25, 0
; PPC32-NEXT: crnor 4, 6, 18
; PPC32-NEXT: mulhwu. 9, 24, 30
; PPC32-NEXT: mcrf 6, 0
; PPC32-NEXT: mulhwu. 0, 25, 28
; PPC32-NEXT: mcrf 7, 0
; PPC32-NEXT: or. 0, 28, 24
; PPC32-NEXT: mcrf 2, 0
; PPC32-NEXT: or. 0, 29, 26
; PPC32-NEXT: crnor 5, 2, 10
; PPC32-NEXT: mullw 10, 26, 27
; PPC32-NEXT: lwz 26, 20(1)
; PPC32-NEXT: mullw 9, 23, 29
; PPC32-NEXT: add 9, 10, 9
; PPC32-NEXT: mtcrf 128, 26 # cr0
; PPC32-NEXT: crorc 6, 12, 2
; PPC32-NEXT: crorc 20, 6, 22
; PPC32-NEXT: mulhwu 7, 29, 27
; PPC32-NEXT: add 9, 7, 9
; PPC32-NEXT: cmplw 9, 7
; PPC32-NEXT: crorc 21, 4, 26
; PPC32-NEXT: cror 20, 20, 0
; PPC32-NEXT: crorc 21, 21, 30
; PPC32-NEXT: mullw 11, 25, 28
; PPC32-NEXT: mullw 12, 24, 30
; PPC32-NEXT: add 10, 12, 11
; PPC32-NEXT: lwz 12, 24(1)
; PPC32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload
; PPC32-NEXT: mulhwu 8, 28, 30
; PPC32-NEXT: add 10, 8, 10
; PPC32-NEXT: cmplw 10, 8
; PPC32-NEXT: cror 21, 21, 0
; PPC32-NEXT: cror 21, 5, 21
; PPC32-NEXT: cror 20, 21, 20
; PPC32-NEXT: mullw 0, 29, 27
; PPC32-NEXT: mtcrf 32, 12 # cr2
; PPC32-NEXT: mtcrf 16, 12 # cr3
; PPC32-NEXT: mtcrf 8, 12 # cr4
; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
; PPC32-NEXT: mullw 7, 28, 30
; PPC32-NEXT: addc 7, 7, 0
; PPC32-NEXT: adde 11, 10, 9
; PPC32-NEXT: addc 9, 4, 7
; PPC32-NEXT: adde 8, 3, 11
; PPC32-NEXT: cmplw 6, 9, 4
; PPC32-NEXT: cmplw 8, 3
; PPC32-NEXT: crand 22, 2, 24
; PPC32-NEXT: crandc 23, 0, 2
; PPC32-NEXT: cror 22, 22, 23
; PPC32-NEXT: crnor 20, 20, 22
; PPC32-NEXT: li 3, 1
; PPC32-NEXT: bc 12, 20, .LBB0_2
; PPC32-NEXT: # %bb.1: # %start
; PPC32-NEXT: ori 7, 3, 0
; PPC32-NEXT: b .LBB0_3
; PPC32-NEXT: .LBB0_2: # %start
; PPC32-NEXT: li 7, 0
; PPC32-NEXT: .LBB0_3: # %start
; PPC32-NEXT: mr 3, 8
; PPC32-NEXT: mr 4, 9
; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
; PPC32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload
; PPC32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload
; PPC32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload
; PPC32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload
; PPC32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload
; PPC32-NEXT: lwz 0, 68(1)
; PPC32-NEXT: addi 1, 1, 64
; PPC32-NEXT: mtlr 0
; PPC32-NEXT: blr
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }