2011-07-07 05:55:05 +02:00
|
|
|
; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
|
2010-04-23 01:24:18 +02:00
|
|
|
|
|
|
|
define i32 @sbfx1(i32 %a) {
|
|
|
|
; CHECK: sbfx1
|
|
|
|
; CHECK: sbfx r0, r0, #7, #11
|
|
|
|
%t1 = lshr i32 %a, 7
|
|
|
|
%t2 = trunc i32 %t1 to i11
|
|
|
|
%t3 = sext i11 %t2 to i32
|
|
|
|
ret i32 %t3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @ubfx1(i32 %a) {
|
|
|
|
; CHECK: ubfx1
|
|
|
|
; CHECK: ubfx r0, r0, #7, #11
|
|
|
|
%t1 = lshr i32 %a, 7
|
|
|
|
%t2 = trunc i32 %t1 to i11
|
|
|
|
%t3 = zext i11 %t2 to i32
|
|
|
|
ret i32 %t3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @ubfx2(i32 %a) {
|
|
|
|
; CHECK: ubfx2
|
|
|
|
; CHECK: ubfx r0, r0, #7, #11
|
|
|
|
%t1 = lshr i32 %a, 7
|
|
|
|
%t2 = and i32 %t1, 2047
|
|
|
|
ret i32 %t2
|
|
|
|
}
|
|
|
|
|
LLVM sdisel normalize bit extraction of the form:
((x & 0xff00) >> 8) << 2
to
(x >> 6) & 0x3fc
This is general goodness since it folds a left shift into the mask. However,
the trailing zeros in the mask prevents the ARM backend from using the bit
extraction instructions. And worse since the mask materialization may require
an addition instruction. This comes up fairly frequently when the result of
the bit twiddling is used as memory address. e.g.
= ptr[(x & 0xFF0000) >> 16]
We want to generate:
ubfx r3, r1, #16, #8
ldr.w r3, [r0, r3, lsl #2]
vs.
mov.w r9, #1020
and.w r2, r9, r1, lsr #14
ldr r2, [r0, r2]
Add a late ARM specific isel optimization to
ARMDAGToDAGISel::PreprocessISelDAG(). It folds the left shift to the
'base + offset' address computation; change the mask to one which doesn't have
trailing zeros and enable the use of ubfx.
Note the optimization has to be done late since it's target specific and we
don't want to change the DAG normalization. It's also fairly restrictive
as shifter operands are not always free. It's only done for lsh 1 / 2. It's
known to be free on some cpus and they are most common for address
computation.
This is a slight win for blowfish, rijndael, etc.
rdar://12870177
llvm-svn: 170581
2012-12-19 21:16:09 +01:00
|
|
|
; rdar://12870177
|
|
|
|
define i32 @ubfx_opt(i32* nocapture %ctx, i32 %x) nounwind readonly ssp {
|
|
|
|
entry:
|
|
|
|
; CHECK: ubfx_opt
|
|
|
|
; CHECK: lsr [[REG1:(lr|r[0-9]+)]], r1, #24
|
|
|
|
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG1]], lsl #2]
|
|
|
|
; CHECK: ubfx [[REG2:(lr|r[0-9]+)]], r1, #16, #8
|
|
|
|
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG2]], lsl #2]
|
|
|
|
; CHECK: ubfx [[REG3:(lr|r[0-9]+)]], r1, #8, #8
|
|
|
|
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG3]], lsl #2]
|
|
|
|
%and = lshr i32 %x, 8
|
|
|
|
%shr = and i32 %and, 255
|
|
|
|
%and1 = lshr i32 %x, 16
|
|
|
|
%shr2 = and i32 %and1, 255
|
|
|
|
%shr4 = lshr i32 %x, 24
|
|
|
|
%arrayidx = getelementptr inbounds i32* %ctx, i32 %shr4
|
|
|
|
%0 = load i32* %arrayidx, align 4
|
|
|
|
%arrayidx5 = getelementptr inbounds i32* %ctx, i32 %shr2
|
|
|
|
%1 = load i32* %arrayidx5, align 4
|
|
|
|
%add = add i32 %1, %0
|
|
|
|
%arrayidx6 = getelementptr inbounds i32* %ctx, i32 %shr
|
|
|
|
%2 = load i32* %arrayidx6, align 4
|
|
|
|
%add7 = add i32 %add, %2
|
|
|
|
ret i32 %add7
|
|
|
|
}
|