mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[PowerPC] Improve instruction selection bit-permuting operations (64-bit)
This is the second installment of improvements to instruction selection for "bit permutation" instruction sequences. r224318 added logic for instruction selection for 32-bit bit permutation sequences, and this adds lowering for 64-bit sequences. The 64-bit sequences are more complicated than the 32-bit ones because: a) the 64-bit versions of the 32-bit rotate-and-mask instructions work by replicating the lower 32-bits of the value-to-be-rotated into the upper 32 bits -- and integrating this into the cost modeling for the various bit group operations is non-trivial b) unlike the 32-bit instructions in 32-bit mode, the rotate-and-mask instructions cannot, in one instruction, specify the mask starting index, the mask ending index, and the rotation factor. Also, forming arbitrary 64-bit constants is more complicated than in 32-bit mode because the number of instructions necessary is value dependent. Plus, support for 'late masking' was added: it is sometimes more efficient to treat the overall value as if it had no mandatory zero bits when planning the bit-group insertions, and then mask them in at the very end. Unfortunately, as the structure of the bit groups is different in the two cases, the more feasible implementation technique was to generate both instruction sequences, and then pick the shorter one. And finally, we now generate reasonable code for i64 bswap: rldicl 5, 3, 16, 0 rldicl 4, 3, 8, 0 rldicl 6, 3, 24, 0 rldimi 4, 5, 8, 48 rldicl 5, 3, 32, 0 rldimi 4, 6, 16, 40 rldicl 6, 3, 48, 0 rldimi 4, 5, 24, 32 rldicl 5, 3, 56, 0 rldimi 4, 6, 40, 16 rldimi 4, 5, 48, 8 rldimi 4, 3, 56, 0 vs. what we used to produce: li 4, 255 rldicl 5, 3, 24, 40 rldicl 6, 3, 40, 24 rldicl 7, 3, 56, 8 sldi 8, 3, 8 sldi 10, 3, 24 sldi 12, 3, 40 rldicl 0, 3, 8, 56 sldi 9, 4, 32 sldi 11, 4, 40 sldi 4, 4, 48 andi. 5, 5, 65280 andis. 6, 6, 255 andis. 7, 7, 65280 sldi 3, 3, 56 and 8, 8, 9 and 4, 12, 4 and 9, 10, 11 or 6, 7, 6 or 5, 5, 0 or 3, 3, 4 or 7, 9, 8 or 4, 6, 5 or 3, 3, 7 or 3, 3, 4 which is 12 instructions, instead of 25, and seems optimal (at least in terms of code size). llvm-svn: 225056
This commit is contained in:
parent
2c8ecfdec0
commit
93997c9aa6
File diff suppressed because it is too large
Load Diff
@ -617,13 +617,11 @@ def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
|
||||
}
|
||||
|
||||
let hasSideEffects = 0 in {
|
||||
let isCommutable = 1 in {
|
||||
defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
|
||||
(ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
|
||||
"rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
|
||||
[]>, isPPC64, RegConstraint<"$rSi = $rA">,
|
||||
NoEncode<"$rSi">;
|
||||
}
|
||||
|
||||
// Rotate instructions.
|
||||
defm RLDCL : MDSForm_1r<30, 8,
|
||||
|
@ -16,6 +16,100 @@ entry:
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @bs8(i64 %x) #0 {
|
||||
entry:
|
||||
%0 = tail call i64 @llvm.bswap.i64(i64 %x)
|
||||
ret i64 %0
|
||||
|
||||
; CHECK-LABEL: @bs8
|
||||
; CHECK-DAG: rldicl [[REG1:[0-9]+]], 3, 16, 0
|
||||
; CHECK-DAG: rldicl [[REG2:[0-9]+]], 3, 8, 0
|
||||
; CHECK-DAG: rldicl [[REG3:[0-9]+]], 3, 24, 0
|
||||
; CHECK-DAG: rldimi [[REG2]], [[REG1]], 8, 48
|
||||
; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 32, 0
|
||||
; CHECK-DAG: rldimi [[REG2]], [[REG3]], 16, 40
|
||||
; CHECK-DAG: rldicl [[REG5:[0-9]+]], 3, 48, 0
|
||||
; CHECK-DAG: rldimi [[REG2]], [[REG4]], 24, 32
|
||||
; CHECK-DAG: rldicl [[REG6:[0-9]+]], 3, 56, 0
|
||||
; CHECK-DAG: rldimi [[REG2]], [[REG5]], 40, 16
|
||||
; CHECK-DAG: rldimi [[REG2]], [[REG6]], 48, 8
|
||||
; CHECK-DAG: rldimi [[REG2]], 3, 56, 0
|
||||
; CHECK: mr 3, [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test1(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = lshr i64 %i1, 8
|
||||
%and = and i64 %0, 5963776000
|
||||
ret i64 %and
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 11375
|
||||
; CHECK-DAG: rldicl [[REG3:[0-9]+]], 4, 56, 0
|
||||
; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 19
|
||||
; CHECK: and 3, [[REG3]], [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test2(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = lshr i64 %i1, 6
|
||||
%and = and i64 %0, 133434808670355456
|
||||
ret i64 %and
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK-DAG: lis [[REG1:[0-9]+]], 474
|
||||
; CHECK-DAG: rldicl [[REG5:[0-9]+]], 4, 58, 0
|
||||
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 3648
|
||||
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 32
|
||||
; CHECK-DAG: oris [[REG4:[0-9]+]], [[REG3]], 25464
|
||||
; CHECK: and 3, [[REG5]], [[REG4]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test3(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = shl i64 %i0, 34
|
||||
%and = and i64 %0, 191795733152661504
|
||||
ret i64 %and
|
||||
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK-DAG: lis [[REG1:[0-9]+]], 170
|
||||
; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 34, 0
|
||||
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 22861
|
||||
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 34
|
||||
; CHECK: and 3, [[REG4]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test4(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = lshr i64 %i1, 15
|
||||
%and = and i64 %0, 58195968
|
||||
ret i64 %and
|
||||
|
||||
; CHECK-LABEL: @test4
|
||||
; CHECK: rldicl [[REG1:[0-9]+]], 4, 49, 0
|
||||
; CHECK: andis. 3, [[REG1]], 888
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test5(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = shl i64 %i1, 12
|
||||
%and = and i64 %0, 127252959854592
|
||||
ret i64 %and
|
||||
|
||||
; CHECK-LABEL: @test5
|
||||
; CHECK-DAG: lis [[REG1:[0-9]+]], 3703
|
||||
; CHECK-DAG: rldicl [[REG4:[0-9]+]], 4, 12, 0
|
||||
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 35951
|
||||
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 19
|
||||
; CHECK: and 3, [[REG4]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define zeroext i32 @test6(i32 zeroext %x) #0 {
|
||||
entry:
|
||||
@ -33,8 +127,153 @@ entry:
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test7(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = lshr i64 %i0, 5
|
||||
%and = and i64 %0, 58195968
|
||||
ret i64 %and
|
||||
|
||||
; CHECK-LABEL: @test7
|
||||
; CHECK: rlwinm [[REG1:[0-9]+]], 3, 27, 9, 12
|
||||
; CHECK: rlwimi [[REG1]], 3, 27, 6, 7
|
||||
; CHECK: mr 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test8(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = lshr i64 %i0, 1
|
||||
%and = and i64 %0, 169172533248
|
||||
ret i64 %and
|
||||
|
||||
; CHECK-LABEL: @test8
|
||||
; CHECK-DAG: lis [[REG1:[0-9]+]], 4
|
||||
; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 63, 0
|
||||
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 60527
|
||||
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 19
|
||||
; CHECK: and 3, [[REG4]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test9(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = lshr i64 %i1, 14
|
||||
%and = and i64 %0, 18848677888
|
||||
%1 = shl i64 %i1, 51
|
||||
%and3 = and i64 %1, 405323966463344640
|
||||
%or4 = or i64 %and, %and3
|
||||
ret i64 %or4
|
||||
|
||||
; CHECK-LABEL: @test9
|
||||
; CHECK-DAG: lis [[REG1:[0-9]+]], 1440
|
||||
; CHECK-DAG: rldicl [[REG5:[0-9]+]], 4, 62, 0
|
||||
; CHECK-DAG: rldicl [[REG6:[0-9]+]], 4, 50, 0
|
||||
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 4
|
||||
; CHECK-DAG: rldimi [[REG6]], [[REG5]], 53, 0
|
||||
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 32
|
||||
; CHECK-DAG: oris [[REG4:[0-9]+]], [[REG3]], 25464
|
||||
; CHECK: and 3, [[REG6]], [[REG4]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test10(i64 %i0, i64 %i1) #0 {
|
||||
entry:
|
||||
%0 = shl i64 %i0, 37
|
||||
%and = and i64 %0, 15881483390550016
|
||||
%1 = shl i64 %i0, 25
|
||||
%and3 = and i64 %1, 2473599172608
|
||||
%or4 = or i64 %and, %and3
|
||||
ret i64 %or4
|
||||
|
||||
; CHECK-LABEL: @test10
|
||||
; CHECK-DAG: lis [[REG1:[0-9]+]], 1
|
||||
; CHECK-DAG: rldicl [[REG6:[0-9]+]], 3, 25, 0
|
||||
; CHECK-DAG: rldicl [[REG7:[0-9]+]], 3, 37, 0
|
||||
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 8183
|
||||
; CHECK-DAG: ori [[REG3:[0-9]+]], [[REG1]], 50017
|
||||
; CHECK-DAG: sldi [[REG4:[0-9]+]], [[REG2]], 25
|
||||
; CHECK-DAG: sldi [[REG5:[0-9]+]], [[REG3]], 37
|
||||
; CHECK-DAG: and [[REG8:[0-9]+]], [[REG6]], [[REG4]]
|
||||
; CHECK-DAG: and [[REG9:[0-9]+]], [[REG7]], [[REG5]]
|
||||
; CHECK: or 3, [[REG9]], [[REG8]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test11(i64 %x) #0 {
|
||||
entry:
|
||||
%and = and i64 %x, 4294967295
|
||||
%shl = shl i64 %x, 32
|
||||
%or = or i64 %and, %shl
|
||||
ret i64 %or
|
||||
|
||||
; CHECK-LABEL: @test11
|
||||
; CHECK: rlwinm 3, 3, 0, 1, 0
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test12(i64 %x) #0 {
|
||||
entry:
|
||||
%and = and i64 %x, 4294905855
|
||||
%shl = shl i64 %x, 32
|
||||
%or = or i64 %and, %shl
|
||||
ret i64 %or
|
||||
|
||||
; CHECK-LABEL: @test12
|
||||
; CHECK: rlwinm 3, 3, 0, 20, 15
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test13(i64 %x) #0 {
|
||||
entry:
|
||||
%shl = shl i64 %x, 4
|
||||
%and = and i64 %shl, 240
|
||||
%shr = lshr i64 %x, 28
|
||||
%and1 = and i64 %shr, 15
|
||||
%or = or i64 %and, %and1
|
||||
ret i64 %or
|
||||
|
||||
; CHECK-LABEL: @test13
|
||||
; CHECK: rlwinm 3, 3, 4, 24, 31
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test14(i64 %x) #0 {
|
||||
entry:
|
||||
%shl = shl i64 %x, 4
|
||||
%and = and i64 %shl, 240
|
||||
%shr = lshr i64 %x, 28
|
||||
%and1 = and i64 %shr, 15
|
||||
%and2 = and i64 %x, -4294967296
|
||||
%or = or i64 %and1, %and2
|
||||
%or3 = or i64 %or, %and
|
||||
ret i64 %or3
|
||||
|
||||
; CHECK-LABEL: @test14
|
||||
; CHECK: rldicr [[REG1:[0-9]+]], 3, 0, 31
|
||||
; CHECK: rlwimi [[REG1]], 3, 4, 24, 31
|
||||
; CHECK: mr 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test15(i64 %x) #0 {
|
||||
entry:
|
||||
%shl = shl i64 %x, 4
|
||||
%and = and i64 %shl, 240
|
||||
%shr = lshr i64 %x, 28
|
||||
%and1 = and i64 %shr, 15
|
||||
%and2 = and i64 %x, -256
|
||||
%or = or i64 %and1, %and2
|
||||
%or3 = or i64 %or, %and
|
||||
ret i64 %or3
|
||||
|
||||
; CHECK-LABEL: @test15
|
||||
; CHECK: rlwimi 3, 3, 4, 24, 31
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.bswap.i32(i32) #0
|
||||
declare i64 @llvm.bswap.i64(i64) #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user