1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AArch64/misched-fusion-aes.ll
David Green f56c09c87f [MachineScheduler] Update available queue on the first mop of a new cycle
If a resource can be held for multiple cycles in the schedule model
then an instruction can be placed into the available queue, another
instruction can be scheduled, but the first will not be taken back out if
the two instructions hazard. To fix this make sure that we update the
available queue even on the first MOp of a cycle, pushing available
instructions back into the pending queue if they now conflict.

This happens with some downstream schedules we have around MVE
instruction scheduling where we use ResourceCycles=[2] to show the
instruction executing over two beats. Apparently the test changes here
are OK too.

Differential Revision: https://reviews.llvm.org/D76909
2020-06-09 19:13:53 +01:00

213 lines
11 KiB
LLVM

; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s
declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k)
declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %d)
declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d, <16 x i8> %k)
declare <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %d)
define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) {
%d0 = load <16 x i8>, <16 x i8>* %a0
%a1 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 1
%d1 = load <16 x i8>, <16 x i8>* %a1
%a2 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 2
%d2 = load <16 x i8>, <16 x i8>* %a2
%a3 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 3
%d3 = load <16 x i8>, <16 x i8>* %a3
%k0 = load <16 x i8>, <16 x i8>* %b0
%e00 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d0, <16 x i8> %k0)
%f00 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00)
%e01 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d1, <16 x i8> %k0)
%f01 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01)
%e02 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d2, <16 x i8> %k0)
%f02 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02)
%e03 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d3, <16 x i8> %k0)
%f03 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03)
%b1 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 1
%k1 = load <16 x i8>, <16 x i8>* %b1
%e10 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f00, <16 x i8> %k1)
%f10 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00)
%e11 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f01, <16 x i8> %k1)
%f11 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01)
%e12 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f02, <16 x i8> %k1)
%f12 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02)
%e13 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f03, <16 x i8> %k1)
%f13 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03)
%b2 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 2
%k2 = load <16 x i8>, <16 x i8>* %b2
%e20 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f10, <16 x i8> %k2)
%f20 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e10)
%e21 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f11, <16 x i8> %k2)
%f21 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e11)
%e22 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f12, <16 x i8> %k2)
%f22 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e12)
%e23 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f13, <16 x i8> %k2)
%f23 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e13)
%b3 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 3
%k3 = load <16 x i8>, <16 x i8>* %b3
%e30 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f20, <16 x i8> %k3)
%f30 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e20)
%e31 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f21, <16 x i8> %k3)
%f31 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e21)
%e32 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f22, <16 x i8> %k3)
%f32 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e22)
%e33 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f23, <16 x i8> %k3)
%f33 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e23)
%g0 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f30, <16 x i8> %d)
%h0 = xor <16 x i8> %g0, %e
%g1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f31, <16 x i8> %d)
%h1 = xor <16 x i8> %g1, %e
%g2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f32, <16 x i8> %d)
%h2 = xor <16 x i8> %g2, %e
%g3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f33, <16 x i8> %d)
%h3 = xor <16 x i8> %g3, %e
store <16 x i8> %h0, <16 x i8>* %c0
%c1 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 1
store <16 x i8> %h1, <16 x i8>* %c1
%c2 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 2
store <16 x i8> %h2, <16 x i8>* %c2
%c3 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 3
store <16 x i8> %h3, <16 x i8>* %c3
ret void
; CHECK-LABEL: aesea:
; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECK: aesmc [[VA]], [[VA]]
; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VB]], [[VB]]
; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VC]], [[VC]]
; CHECK: aese [[VD:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VD]], [[VD]]
; CHECK: aese [[VE:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VE]], [[VE]]
; CHECK: aese [[VF:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VF]], [[VF]]
; CHECK: aese [[VG:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VG]], [[VG]]
; CHECK: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VH]], [[VH]]
; CHECK-NOT: aesmc
}
define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) {
%d0 = load <16 x i8>, <16 x i8>* %a0
%a1 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 1
%d1 = load <16 x i8>, <16 x i8>* %a1
%a2 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 2
%d2 = load <16 x i8>, <16 x i8>* %a2
%a3 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 3
%d3 = load <16 x i8>, <16 x i8>* %a3
%k0 = load <16 x i8>, <16 x i8>* %b0
%e00 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d0, <16 x i8> %k0)
%f00 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00)
%e01 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d1, <16 x i8> %k0)
%f01 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01)
%e02 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d2, <16 x i8> %k0)
%f02 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02)
%e03 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d3, <16 x i8> %k0)
%f03 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03)
%b1 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 1
%k1 = load <16 x i8>, <16 x i8>* %b1
%e10 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f00, <16 x i8> %k1)
%f10 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00)
%e11 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f01, <16 x i8> %k1)
%f11 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01)
%e12 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f02, <16 x i8> %k1)
%f12 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02)
%e13 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f03, <16 x i8> %k1)
%f13 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03)
%b2 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 2
%k2 = load <16 x i8>, <16 x i8>* %b2
%e20 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f10, <16 x i8> %k2)
%f20 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e10)
%e21 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f11, <16 x i8> %k2)
%f21 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e11)
%e22 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f12, <16 x i8> %k2)
%f22 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e12)
%e23 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f13, <16 x i8> %k2)
%f23 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e13)
%b3 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 3
%k3 = load <16 x i8>, <16 x i8>* %b3
%e30 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f20, <16 x i8> %k3)
%f30 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e20)
%e31 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f21, <16 x i8> %k3)
%f31 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e21)
%e32 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f22, <16 x i8> %k3)
%f32 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e22)
%e33 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f23, <16 x i8> %k3)
%f33 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e23)
%g0 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f30, <16 x i8> %d)
%h0 = xor <16 x i8> %g0, %e
%g1 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f31, <16 x i8> %d)
%h1 = xor <16 x i8> %g1, %e
%g2 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f32, <16 x i8> %d)
%h2 = xor <16 x i8> %g2, %e
%g3 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f33, <16 x i8> %d)
%h3 = xor <16 x i8> %g3, %e
store <16 x i8> %h0, <16 x i8>* %c0
%c1 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 1
store <16 x i8> %h1, <16 x i8>* %c1
%c2 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 2
store <16 x i8> %h2, <16 x i8>* %c2
%c3 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 3
store <16 x i8> %h3, <16 x i8>* %c3
ret void
; CHECK-LABEL: aesda:
; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECK: aesimc [[VA]], [[VA]]
; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesimc [[VB]], [[VB]]
; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesimc [[VC]], [[VC]]
; CHECK: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesimc [[VD]], [[VD]]
; CHECK: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesimc [[VE]], [[VE]]
; CHECK: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesimc [[VF]], [[VF]]
; CHECK: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesimc [[VG]], [[VG]]
; CHECK: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesimc [[VH]], [[VH]]
; CHECK-NOT: aesimc
}
define void @aes_load_store(<16 x i8> *%p1, <16 x i8> *%p2 , <16 x i8> *%p3) {
entry:
%x1 = alloca <16 x i8>, align 16
%x2 = alloca <16 x i8>, align 16
%x3 = alloca <16 x i8>, align 16
%x4 = alloca <16 x i8>, align 16
%x5 = alloca <16 x i8>, align 16
%in1 = load <16 x i8>, <16 x i8>* %p1, align 16
store <16 x i8> %in1, <16 x i8>* %x1, align 16
%aese1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in1) #2
%in2 = load <16 x i8>, <16 x i8>* %p2, align 16
%aesmc1= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese1) #2
%aese2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in2) #2
store <16 x i8> %aesmc1, <16 x i8>* %x3, align 16
%in3 = load <16 x i8>, <16 x i8>* %p3, align 16
%aesmc2= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese2) #2
%aese3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %aesmc2, <16 x i8> %in3) #2
store <16 x i8> %aese3, <16 x i8>* %x5, align 16
ret void
; CHECK-LABEL: aes_load_store:
; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
; aese and aesmc are described to share a unit, hence won't be scheduled on the
; same cycle and the scheduler can find another instruction to place inbetween
; CHECK: aesmc [[VA]], [[VA]]
; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc [[VB]], [[VB]]
; CHECK-NOT: aesmc
}