update a bunch of entries.

llvm-svn: 122700
2024-10-19 11:02:59 +02:00 · 2011-01-02 18:31:38 +00:00 · 2011-01-02 18:31:38 +00:00 · 222b24e2de
commit 222b24e2de
parent 2d1c116071
2 changed files with 55 additions and 136 deletions
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@ -2,38 +2,6 @@ Target Independent Opportunities:

 //===---------------------------------------------------------------------===//

-We should recognize idioms for add-with-carry and turn it into the appropriate
-intrinsics.  This example:
-
-unsigned add32carry(unsigned sum, unsigned x) {
- unsigned z = sum + x;
- if (sum + x < x)
-     z++;
- return z;
-}
-
-Compiles to: clang t.c -S -o - -O3 -fomit-frame-pointer -m64 -mkernel
-
-_add32carry:                            ## @add32carry
-	addl	%esi, %edi
-	cmpl	%esi, %edi
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	%edi, %eax
-	ret
-
-with clang, but to:
-
-_add32carry:
-	leal	(%rsi,%rdi), %eax
-	cmpl	%esi, %eax
-	adcl	$0, %eax
-	ret
-
-with gcc.
-
-//===---------------------------------------------------------------------===//
-
 Dead argument elimination should be enhanced to handle cases when an argument is
 dead to an externally visible function.  Though the argument can't be removed
 from the externally visible function, the caller doesn't need to pass it in.
@ -82,6 +50,9 @@ unsigned int mul(unsigned int a,unsigned int b) {
  return a*b;
 }

+The legalization code for mul-with-overflow needs to be made more robust before
+this can be implemented though.
+
 //===---------------------------------------------------------------------===//

 Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and
@ -92,41 +63,6 @@ right).

 //===---------------------------------------------------------------------===//

-Solve this DAG isel folding deficiency:
-
-int X, Y;
-
-void fn1(void)
-{
-  X = X | (Y << 3);
-}
-
-compiles to
-
-fn1:
-	movl Y, %eax
-	shll $3, %eax
-	orl X, %eax
-	movl %eax, X
-	ret
-
-The problem is the store's chain operand is not the load X but rather
-a TokenFactor of the load X and load Y, which prevents the folding.
-
-There are two ways to fix this:
-
-1. The dag combiner can start using alias analysis to realize that y/x
-   don't alias, making the store to X not dependent on the load from Y.
-2. The generated isel could be made smarter in the case it can't
-   disambiguate the pointers.
-
-Number 1 is the preferred solution.
-
-This has been "fixed" by a TableGen hack. But that is a short term workaround
-which will be removed once the proper fix is made.
-
-//===---------------------------------------------------------------------===//
-
 On targets with expensive 64-bit multiply, we could LSR this:

 for (i = ...; ++i) {
@ -339,14 +275,6 @@ unsigned long reverse(unsigned v) {
    return v ^ (t >> 8);
 }

-Neither is this (very standard idiom):
-
-int f(int n)
-{
-  return (((n) << 24) | (((n) & 0xff00) << 8) 
-       | (((n) >> 8) & 0xff00) | ((n) >> 24));
-}
-
 //===---------------------------------------------------------------------===//

 [LOOP RECOGNITION]
@ -382,9 +310,7 @@ unsigned int popcount(unsigned int input) {
  return count;
 }

-This is a form of idiom recognition for loops, the same thing that could be
-useful for recognizing memset/memcpy.  This sort of thing should be added to the
-loop idiom pass.
+This sort of thing should be added to the loop idiom pass.

 //===---------------------------------------------------------------------===//

@ -639,46 +565,21 @@ struct THotKey { short Key; bool Control; bool Shift; bool Alt; };
 extern THotKey m_HotKey;
 THotKey GetHotKey () { return m_HotKey; }

-into (-O3 -fno-exceptions -static -fomit-frame-pointer):
+into (-m64 -O3 -fno-exceptions -static -fomit-frame-pointer):

-__Z9GetHotKeyv:
-	pushl	%esi
-	movl	8(%esp), %eax
-	movb	_m_HotKey+3, %cl
-	movb	_m_HotKey+4, %dl
-	movb	_m_HotKey+2, %ch
-	movw	_m_HotKey, %si
-	movw	%si, (%eax)
-	movb	%ch, 2(%eax)
-	movb	%cl, 3(%eax)
-	movb	%dl, 4(%eax)
-	popl	%esi
-	ret	$4
-
-GCC produces:
-
-__Z9GetHotKeyv:
-	movl	_m_HotKey, %edx
-	movl	4(%esp), %eax
-	movl	%edx, (%eax)
-	movzwl	_m_HotKey+4, %edx
-	movw	%dx, 4(%eax)
-	ret	$4
-
-The LLVM IR contains the needed alignment info, so we should be able to 
-merge the loads and stores into 4-byte loads:
-
-	%struct.THotKey = type { i16, i8, i8, i8 }
-define void @_Z9GetHotKeyv(%struct.THotKey* sret  %agg.result) nounwind  {
-...
-	%tmp2 = load i16* getelementptr (@m_HotKey, i32 0, i32 0), align 8
-	%tmp5 = load i8* getelementptr (@m_HotKey, i32 0, i32 1), align 2
-	%tmp8 = load i8* getelementptr (@m_HotKey, i32 0, i32 2), align 1
-	%tmp11 = load i8* getelementptr (@m_HotKey, i32 0, i32 3), align 2
-
-Alternatively, we should use a small amount of base-offset alias analysis
-to make it so the scheduler doesn't need to hold all the loads in regs at
-once.
+__Z9GetHotKeyv:                         ## @_Z9GetHotKeyv
+	movq	_m_HotKey@GOTPCREL(%rip), %rax
+	movzwl	(%rax), %ecx
+	movzbl	2(%rax), %edx
+	shlq	$16, %rdx
+	orq	%rcx, %rdx
+	movzbl	3(%rax), %ecx
+	shlq	$24, %rcx
+	orq	%rdx, %rcx
+	movzbl	4(%rax), %eax
+	shlq	$32, %rax
+	orq	%rcx, %rax
+	ret

 //===---------------------------------------------------------------------===//

@ -764,20 +665,6 @@ etc.  On X86, we miss a bunch of 'rotate by variable' cases because the rotate
 matching code in dag combine doesn't look through truncates aggressively 
 enough.  Here are some testcases reduces from GCC PR17886:

-unsigned long long f(unsigned long long x, int y) {
-  return (x << y) | (x >> 64-y); 
-} 
-unsigned f2(unsigned x, int y){
-  return (x << y) | (x >> 32-y); 
-} 
-unsigned long long f3(unsigned long long x){
-  int y = 9;
-  return (x << y) | (x >> 64-y); 
-} 
-unsigned f4(unsigned x){
-  int y = 10;
-  return (x << y) | (x >> 32-y); 
-}
 unsigned long long f5(unsigned long long x, unsigned long long y) {
  return (x << 8) | ((y >> 48) & 0xffull);
 }
@ -796,11 +683,6 @@ unsigned long long f6(unsigned long long x, unsigned long long y, int z) {
  }
 }

-On X86-64, we only handle f2/f3/f4 right.  On x86-32, a few of these 
-generate truly horrible code, instead of using shld and friends.  On
-ARM, we end up with calls to L___lshrdi3/L___ashldi3 in f, which is
-badness.  PPC64 misses f, f5 and f6.  CellSPU aborts in isel.
-
 //===---------------------------------------------------------------------===//

 This (and similar related idioms):
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@ -1507,6 +1507,8 @@ loop, the value comes into the loop as two values, and
 RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
 constructed BUILD_PAIR which represents the cast value.

+This can be handled by making CodeGenPrepare sink the cast.
+
 //===---------------------------------------------------------------------===//

 Test instructions can be eliminated by using EFLAGS values from arithmetic
@ -1847,3 +1849,38 @@ _foo:
 0 is the only unsigned number < 1.

 //===---------------------------------------------------------------------===//
+
+This code:
+
+%0 = type { i32, i1 }
+
+define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x)
+  %cmp = extractvalue %0 %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %add = add i32 %x, %sum
+  %z.0 = add i32 %add, %inc
+  ret i32 %z.0
+}
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+compiles to:
+
+_add32carry:                            ## @add32carry
+	addl	%esi, %edi
+	sbbl	%ecx, %ecx
+	movl	%edi, %eax
+	subl	%ecx, %eax
+	ret
+
+But it could be:
+
+_add32carry:
+	leal	(%rsi,%rdi), %eax
+	cmpl	%esi, %eax
+	adcl	$0, %eax
+	ret
+
+//===---------------------------------------------------------------------===//