llvm-mirror/test/CodeGen/ARM/bfx.ll

; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s

define i32 @sbfx1(i32 %a) {
; CHECK: sbfx1
; CHECK: sbfx r0, r0, #7, #11
	%t1 = lshr i32 %a, 7
	%t2 = trunc i32 %t1 to i11
	%t3 = sext i11 %t2 to i32
	ret i32 %t3
}

define i32 @ubfx1(i32 %a) {
; CHECK: ubfx1
; CHECK: ubfx r0, r0, #7, #11
	%t1 = lshr i32 %a, 7
	%t2 = trunc i32 %t1 to i11
	%t3 = zext i11 %t2 to i32
	ret i32 %t3
}

define i32 @ubfx2(i32 %a) {
; CHECK: ubfx2
; CHECK: ubfx r0, r0, #7, #11
	%t1 = lshr i32 %a, 7
	%t2 = and i32 %t1, 2047
	ret i32 %t2
}

; rdar://12870177
define i32 @ubfx_opt(i32* nocapture %ctx, i32 %x) nounwind readonly ssp {
entry:
; CHECK: ubfx_opt
; CHECK: lsr [[REG1:(lr|r[0-9]+)]], r1, #24
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG1]], lsl #2]
; CHECK: ubfx [[REG2:(lr|r[0-9]+)]], r1, #16, #8
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG2]], lsl #2]
; CHECK: ubfx [[REG3:(lr|r[0-9]+)]], r1, #8, #8
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG3]], lsl #2]
  %and = lshr i32 %x, 8
  %shr = and i32 %and, 255
  %and1 = lshr i32 %x, 16
  %shr2 = and i32 %and1, 255
  %shr4 = lshr i32 %x, 24
  %arrayidx = getelementptr inbounds i32* %ctx, i32 %shr4
  %0 = load i32* %arrayidx, align 4
  %arrayidx5 = getelementptr inbounds i32* %ctx, i32 %shr2
  %1 = load i32* %arrayidx5, align 4
  %add = add i32 %1, %0
  %arrayidx6 = getelementptr inbounds i32* %ctx, i32 %shr
  %2 = load i32* %arrayidx6, align 4
  %add7 = add i32 %add, %2
  ret i32 %add7
}
Change some ARM subtarget features to be single bit yes/no in order to sink them down to MC layer. Also fix tests. llvm-svn: 134590 2011-07-07 05:55:05 +02:00			`; RUN: llc < %s -march=arm -mattr=+v7 \| FileCheck %s`
Update ARM DAGtoDAG for matching UBFX instruction for unsigned bitfield extraction. This fixes PR5998. llvm-svn: 102144 2010-04-23 01:24:18 +02:00
			`define i32 @sbfx1(i32 %a) {`
			`; CHECK: sbfx1`
			`; CHECK: sbfx r0, r0, #7, #11`
			`%t1 = lshr i32 %a, 7`
			`%t2 = trunc i32 %t1 to i11`
			`%t3 = sext i11 %t2 to i32`
			`ret i32 %t3`
			`}`

			`define i32 @ubfx1(i32 %a) {`
			`; CHECK: ubfx1`
			`; CHECK: ubfx r0, r0, #7, #11`
			`%t1 = lshr i32 %a, 7`
			`%t2 = trunc i32 %t1 to i11`
			`%t3 = zext i11 %t2 to i32`
			`ret i32 %t3`
			`}`

			`define i32 @ubfx2(i32 %a) {`
			`; CHECK: ubfx2`
			`; CHECK: ubfx r0, r0, #7, #11`
			`%t1 = lshr i32 %a, 7`
			`%t2 = and i32 %t1, 2047`
			`ret i32 %t2`
			`}`

LLVM sdisel normalize bit extraction of the form: ((x & 0xff00) >> 8) << 2 to (x >> 6) & 0x3fc This is general goodness since it folds a left shift into the mask. However, the trailing zeros in the mask prevents the ARM backend from using the bit extraction instructions. And worse since the mask materialization may require an addition instruction. This comes up fairly frequently when the result of the bit twiddling is used as memory address. e.g. = ptr[(x & 0xFF0000) >> 16] We want to generate: ubfx r3, r1, #16, #8 ldr.w r3, [r0, r3, lsl #2] vs. mov.w r9, #1020 and.w r2, r9, r1, lsr #14 ldr r2, [r0, r2] Add a late ARM specific isel optimization to ARMDAGToDAGISel::PreprocessISelDAG(). It folds the left shift to the 'base + offset' address computation; change the mask to one which doesn't have trailing zeros and enable the use of ubfx. Note the optimization has to be done late since it's target specific and we don't want to change the DAG normalization. It's also fairly restrictive as shifter operands are not always free. It's only done for lsh 1 / 2. It's known to be free on some cpus and they are most common for address computation. This is a slight win for blowfish, rijndael, etc. rdar://12870177 llvm-svn: 170581 2012-12-19 21:16:09 +01:00			`; rdar://12870177`
			`define i32 @ubfx_opt(i32* nocapture %ctx, i32 %x) nounwind readonly ssp {`
			`entry:`
			`; CHECK: ubfx_opt`
			`; CHECK: lsr [[REG1:(lr\|r[0-9]+)]], r1, #24`
			`; CHECK: ldr {{lr\|r[0-9]+}}, [r0, [[REG1]], lsl #2]`
			`; CHECK: ubfx [[REG2:(lr\|r[0-9]+)]], r1, #16, #8`
			`; CHECK: ldr {{lr\|r[0-9]+}}, [r0, [[REG2]], lsl #2]`
			`; CHECK: ubfx [[REG3:(lr\|r[0-9]+)]], r1, #8, #8`
			`; CHECK: ldr {{lr\|r[0-9]+}}, [r0, [[REG3]], lsl #2]`
			`%and = lshr i32 %x, 8`
			`%shr = and i32 %and, 255`
			`%and1 = lshr i32 %x, 16`
			`%shr2 = and i32 %and1, 255`
			`%shr4 = lshr i32 %x, 24`
			`%arrayidx = getelementptr inbounds i32* %ctx, i32 %shr4`
			`%0 = load i32* %arrayidx, align 4`
			`%arrayidx5 = getelementptr inbounds i32* %ctx, i32 %shr2`
			`%1 = load i32* %arrayidx5, align 4`
			`%add = add i32 %1, %0`
			`%arrayidx6 = getelementptr inbounds i32* %ctx, i32 %shr`
			`%2 = load i32* %arrayidx6, align 4`
			`%add7 = add i32 %add, %2`
			`ret i32 %add7`
			`}`