llvm-mirror/test/CodeGen/X86/h-register-addressing-32.ll

; RUN: llc < %s -march=x86 | grep "movzbl	%[abcd]h," | count 7

; Use h-register extract and zero-extend.

define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {
  %t0 = lshr i32 %x, 8
  %t1 = and i32 %t0, 255
  %t2 = getelementptr double* %p, i32 %t1
  %t3 = load double* %t2, align 8
  ret double %t3
}
define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
  %t0 = lshr i32 %x, 8
  %t1 = and i32 %t0, 255
  %t2 = getelementptr float* %p, i32 %t1
  %t3 = load float* %t2, align 8
  ret float %t3
}
define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
  %t0 = lshr i32 %x, 8
  %t1 = and i32 %t0, 255
  %t2 = getelementptr i16* %p, i32 %t1
  %t3 = load i16* %t2, align 8
  ret i16 %t3
}
define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
  %t0 = lshr i32 %x, 8
  %t1 = and i32 %t0, 255
  %t2 = getelementptr i8* %p, i32 %t1
  %t3 = load i8* %t2, align 8
  ret i8 %t3
}
define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
  %t0 = lshr i32 %x, 5
  %t1 = and i32 %t0, 2040
  %t2 = getelementptr i8* %p, i32 %t1
  %t3 = load i8* %t2, align 8
  ret i8 %t3
}
define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
  %t0 = lshr i32 %x, 6
  %t1 = and i32 %t0, 1020
  %t2 = getelementptr i8* %p, i32 %t1
  %t3 = load i8* %t2, align 8
  ret i8 %t3
}
define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
  %t0 = lshr i32 %x, 7
  %t1 = and i32 %t0, 510
  %t2 = getelementptr i8* %p, i32 %t1
  %t3 = load i8* %t2, align 8
  ret i8 %t3
}
Convert all tests using TCL-style quoting to use shell-style quoting. This was done through the aid of a terrible Perl creation. I will not paste any of the horrors here. Suffice to say, it require multiple staged rounds of replacements, state carried between, and a few nested-construct-parsing hacks that I'm not proud of. It happens, by luck, to be able to deal with all the TCL-quoting patterns in evidence in the LLVM test suite. If anyone is maintaining large out-of-tree test trees, feel free to poke me and I'll send you the steps I used to convert things, as well as answer any painful questions etc. IRC works best for this type of thing I find. Once converted, switch the LLVM lit config to use ShTests the same as Clang. In addition to being able to delete large amounts of Python code from 'lit', this will also simplify the entire test suite and some of lit's architecture. Finally, the test suite runs 33% faster on Linux now. ;] For my 16-hardware-thread (2x 4-core xeon e5520): 36s -> 24s llvm-svn: 159525 2012-07-02 14:47:22 +02:00			`; RUN: llc < %s -march=x86 \| grep "movzbl %[abcd]h," \| count 7`
Implement x86 h-register extract support. - Add patterns for h-register extract, which avoids a shift and mask, and in some cases a temporary register. - Add address-mode matching for turning (X>>(8-n))&(255<<n), where n is a valid address-mode scale value, into an h-register extract and a scaled-offset address. - Replace X86's MOV32to32_ and related instructions with the new target-independent COPY_TO_SUBREG instruction. On x86-64 there are complicated constraints on h registers, and CodeGen doesn't currently provide a high-level way to express all of them, so they are handled with a bunch of special code. This code currently only supports extracts where the result is used by a zero-extend or a store, though these are fairly common. These transformations are not always beneficial; since there are only 4 h registers, they sometimes require extra move instructions, and this sometimes increases register pressure because it can force out values that would otherwise be in one of those registers. However, this appears to be relatively uncommon. llvm-svn: 68962 2009-04-13 18:09:41 +02:00
			`; Use h-register extract and zero-extend.`

			`define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {`
			`%t0 = lshr i32 %x, 8`
			`%t1 = and i32 %t0, 255`
			`%t2 = getelementptr double* %p, i32 %t1`
			`%t3 = load double* %t2, align 8`
			`ret double %t3`
			`}`
			`define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {`
			`%t0 = lshr i32 %x, 8`
			`%t1 = and i32 %t0, 255`
			`%t2 = getelementptr float* %p, i32 %t1`
			`%t3 = load float* %t2, align 8`
			`ret float %t3`
			`}`
			`define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {`
			`%t0 = lshr i32 %x, 8`
			`%t1 = and i32 %t0, 255`
			`%t2 = getelementptr i16* %p, i32 %t1`
			`%t3 = load i16* %t2, align 8`
			`ret i16 %t3`
			`}`
			`define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {`
			`%t0 = lshr i32 %x, 8`
			`%t1 = and i32 %t0, 255`
			`%t2 = getelementptr i8* %p, i32 %t1`
			`%t3 = load i8* %t2, align 8`
			`ret i8 %t3`
			`}`
			`define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {`
			`%t0 = lshr i32 %x, 5`
			`%t1 = and i32 %t0, 2040`
			`%t2 = getelementptr i8* %p, i32 %t1`
			`%t3 = load i8* %t2, align 8`
			`ret i8 %t3`
			`}`
			`define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {`
			`%t0 = lshr i32 %x, 6`
			`%t1 = and i32 %t0, 1020`
			`%t2 = getelementptr i8* %p, i32 %t1`
			`%t3 = load i8* %t2, align 8`
			`ret i8 %t3`
			`}`
			`define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {`
			`%t0 = lshr i32 %x, 7`
			`%t1 = and i32 %t0, 510`
			`%t2 = getelementptr i8* %p, i32 %t1`
			`%t3 = load i8* %t2, align 8`
			`ret i8 %t3`
			`}`