2009-09-09 01:54:48 +02:00
|
|
|
; RUN: llc < %s -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 7
|
Implement x86 h-register extract support.
- Add patterns for h-register extract, which avoids a shift and mask,
and in some cases a temporary register.
- Add address-mode matching for turning (X>>(8-n))&(255<<n), where
n is a valid address-mode scale value, into an h-register extract
and a scaled-offset address.
- Replace X86's MOV32to32_ and related instructions with the new
target-independent COPY_TO_SUBREG instruction.
On x86-64 there are complicated constraints on h registers, and
CodeGen doesn't currently provide a high-level way to express all of them,
so they are handled with a bunch of special code. This code currently only
supports extracts where the result is used by a zero-extend or a store,
though these are fairly common.
These transformations are not always beneficial; since there are only
4 h registers, they sometimes require extra move instructions, and
this sometimes increases register pressure because it can force out
values that would otherwise be in one of those registers. However,
this appears to be relatively uncommon.
llvm-svn: 68962
2009-04-13 18:09:41 +02:00
|
|
|
|
|
|
|
; Use h-register extract and zero-extend.
|
|
|
|
|
|
|
|
define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly {
|
|
|
|
%t0 = lshr i64 %x, 8
|
|
|
|
%t1 = and i64 %t0, 255
|
|
|
|
%t2 = getelementptr double* %p, i64 %t1
|
|
|
|
%t3 = load double* %t2, align 8
|
|
|
|
ret double %t3
|
|
|
|
}
|
|
|
|
define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
|
|
|
|
%t0 = lshr i64 %x, 8
|
|
|
|
%t1 = and i64 %t0, 255
|
|
|
|
%t2 = getelementptr float* %p, i64 %t1
|
|
|
|
%t3 = load float* %t2, align 8
|
|
|
|
ret float %t3
|
|
|
|
}
|
|
|
|
define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
|
|
|
|
%t0 = lshr i64 %x, 8
|
|
|
|
%t1 = and i64 %t0, 255
|
|
|
|
%t2 = getelementptr i16* %p, i64 %t1
|
|
|
|
%t3 = load i16* %t2, align 8
|
|
|
|
ret i16 %t3
|
|
|
|
}
|
|
|
|
define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
|
|
|
|
%t0 = lshr i64 %x, 8
|
|
|
|
%t1 = and i64 %t0, 255
|
|
|
|
%t2 = getelementptr i8* %p, i64 %t1
|
|
|
|
%t3 = load i8* %t2, align 8
|
|
|
|
ret i8 %t3
|
|
|
|
}
|
|
|
|
define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
|
|
|
|
%t0 = lshr i64 %x, 5
|
|
|
|
%t1 = and i64 %t0, 2040
|
|
|
|
%t2 = getelementptr i8* %p, i64 %t1
|
|
|
|
%t3 = load i8* %t2, align 8
|
|
|
|
ret i8 %t3
|
|
|
|
}
|
|
|
|
define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
|
|
|
|
%t0 = lshr i64 %x, 6
|
|
|
|
%t1 = and i64 %t0, 1020
|
|
|
|
%t2 = getelementptr i8* %p, i64 %t1
|
|
|
|
%t3 = load i8* %t2, align 8
|
|
|
|
ret i8 %t3
|
|
|
|
}
|
|
|
|
define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
|
|
|
|
%t0 = lshr i64 %x, 7
|
|
|
|
%t1 = and i64 %t0, 510
|
|
|
|
%t2 = getelementptr i8* %p, i64 %t1
|
|
|
|
%t3 = load i8* %t2, align 8
|
|
|
|
ret i8 %t3
|
|
|
|
}
|