llvm-mirror/test/CodeGen/X86/divide-by-constant.ll

; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i686-pc-linux-gnu"

define zeroext i16 @test1(i16 zeroext %x) nounwind {
entry:
	%div = udiv i16 %x, 33
	ret i16 %div
; CHECK: test1:
; CHECK: imull	$63551, %eax, %eax
; CHECK-NEXT: shrl	$21, %eax
; CHECK-NEXT: ret
}

define zeroext i16 @test2(i8 signext %x, i16 zeroext %c) nounwind readnone ssp noredzone {
entry:
  %div = udiv i16 %c, 3
  ret i16 %div

; CHECK: test2:
; CHECK: imull	$43691, %eax, %eax
; CHECK-NEXT: shrl	$17, %eax
; CHECK-NEXT: ret
}

define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) nounwind readnone ssp noredzone {
entry:
  %div = udiv i8 %c, 3
  ret i8 %div

; CHECK: test3:
; CHECK: movzbl  8(%esp), %eax
; CHECK-NEXT: imull	$171, %eax, %eax
; CHECK-NEXT: shrl	$9, %eax
; CHECK-NEXT: ret
}

define signext i16 @test4(i16 signext %x) nounwind {
entry:
	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
	ret i16 %div
; CHECK: test4:
; CHECK: imull	$1986, %eax, %
}

define i32 @test5(i32 %A) nounwind {
        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
        ret i32 %tmp1
; CHECK: test5:
; CHECK: movl	$365384439, %eax
; CHECK: mull	4(%esp)
}

define signext i16 @test6(i16 signext %x) nounwind {
entry:
  %div = sdiv i16 %x, 10
  ret i16 %div
; CHECK: test6:
; CHECK: imull	$26215, %eax, %eax
; CHECK: shrl	$31, %ecx
; CHECK: sarl	$18, %eax
}

define i32 @test7(i32 %x) nounwind {
  %div = udiv i32 %x, 28
  ret i32 %div
; CHECK: test7:
; CHECK: shrl $2
; CHECK: movl $613566757
; CHECK: mull
; CHECK-NOT: shrl
; CHECK: ret
}
Instruction scheduling itinerary for Intel Atom. Adds an instruction itinerary to all x86 instructions, giving each a default latency of 1, using the InstrItinClass IIC_DEFAULT. Sets specific latencies for Atom for the instructions in files X86InstrCMovSetCC.td, X86InstrArithmetic.td, X86InstrControl.td, and X86InstrShiftRotate.td. The Atom latencies for the remainder of the x86 instructions will be set in subsequent patches. Adds a test to verify that the scheduler is working. Also changes the scheduling preference to "Hybrid" for i386 Atom, while leaving x86_64 as ILP. Patch by Preston Gurd! llvm-svn: 149558 2012-02-02 00:20:51 +01:00			`; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 \| FileCheck %s`
Fix for PR2164: allow transforming arbitrary-width unsigned divides into multiplies. Some more cleverness would be nice, though. It would be nice if we could do this transformation on illegal types. Also, we would prefer a narrower constant when possible so that we can use a narrower multiply, which can be cheaper. llvm-svn: 60283 2008-11-30 07:02:26 +01:00			`target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"`
			`target triple = "i686-pc-linux-gnu"`

Add a couple dag combines to transform mulhi/mullo into a wider multiply when the wider type is legal. This allows us to compile: define zeroext i16 @test1(i16 zeroext %x) nounwind { entry: %div = udiv i16 %x, 33 ret i16 %div } into: test1: # @test1 movzwl 4(%esp), %eax imull $63551, %eax, %eax # imm = 0xF83F shrl $21, %eax ret instead of: test1: # @test1 movw $-1985, %ax # imm = 0xFFFFFFFFFFFFF83F mulw 4(%esp) andl $65504, %edx # imm = 0xFFE0 movl %edx, %eax shrl $5, %eax ret Implementing rdar://8760399 and example #4 from: http://blog.regehr.org/archives/320 We should implement the same thing for [su]mul_hilo, but I don't have immediate plans to do this. llvm-svn: 121696 2010-12-13 09:39:01 +01:00			`define zeroext i16 @test1(i16 zeroext %x) nounwind {`
Fix for PR2164: allow transforming arbitrary-width unsigned divides into multiplies. Some more cleverness would be nice, though. It would be nice if we could do this transformation on illegal types. Also, we would prefer a narrower constant when possible so that we can use a narrower multiply, which can be cheaper. llvm-svn: 60283 2008-11-30 07:02:26 +01:00			`entry:`
Add a couple dag combines to transform mulhi/mullo into a wider multiply when the wider type is legal. This allows us to compile: define zeroext i16 @test1(i16 zeroext %x) nounwind { entry: %div = udiv i16 %x, 33 ret i16 %div } into: test1: # @test1 movzwl 4(%esp), %eax imull $63551, %eax, %eax # imm = 0xF83F shrl $21, %eax ret instead of: test1: # @test1 movw $-1985, %ax # imm = 0xFFFFFFFFFFFFF83F mulw 4(%esp) andl $65504, %edx # imm = 0xFFE0 movl %edx, %eax shrl $5, %eax ret Implementing rdar://8760399 and example #4 from: http://blog.regehr.org/archives/320 We should implement the same thing for [su]mul_hilo, but I don't have immediate plans to do this. llvm-svn: 121696 2010-12-13 09:39:01 +01:00			`%div = udiv i16 %x, 33`
			`ret i16 %div`
			`; CHECK: test1:`
			`; CHECK: imull $63551, %eax, %eax`
			`; CHECK-NEXT: shrl $21, %eax`
			`; CHECK-NEXT: ret`
			`}`

			`define zeroext i16 @test2(i8 signext %x, i16 zeroext %c) nounwind readnone ssp noredzone {`
			`entry:`
			`%div = udiv i16 %c, 3`
			`ret i16 %div`

			`; CHECK: test2:`
			`; CHECK: imull $43691, %eax, %eax`
			`; CHECK-NEXT: shrl $17, %eax`
			`; CHECK-NEXT: ret`
			`}`

			`define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) nounwind readnone ssp noredzone {`
			`entry:`
			`%div = udiv i8 %c, 3`
			`ret i8 %div`

			`; CHECK: test3:`
Reapply 122353-122355 with fixes. 122354 was wrong; the shift type was needed one place, the shift count type another. The transform in 123555 had the same problem. llvm-svn: 122366 2010-12-21 22:55:50 +01:00			`; CHECK: movzbl 8(%esp), %eax`
			`; CHECK-NEXT: imull $171, %eax, %eax`
			`; CHECK-NEXT: shrl $9, %eax`
Add a couple dag combines to transform mulhi/mullo into a wider multiply when the wider type is legal. This allows us to compile: define zeroext i16 @test1(i16 zeroext %x) nounwind { entry: %div = udiv i16 %x, 33 ret i16 %div } into: test1: # @test1 movzwl 4(%esp), %eax imull $63551, %eax, %eax # imm = 0xF83F shrl $21, %eax ret instead of: test1: # @test1 movw $-1985, %ax # imm = 0xFFFFFFFFFFFFF83F mulw 4(%esp) andl $65504, %edx # imm = 0xFFE0 movl %edx, %eax shrl $5, %eax ret Implementing rdar://8760399 and example #4 from: http://blog.regehr.org/archives/320 We should implement the same thing for [su]mul_hilo, but I don't have immediate plans to do this. llvm-svn: 121696 2010-12-13 09:39:01 +01:00			`; CHECK-NEXT: ret`
			`}`

			`define signext i16 @test4(i16 signext %x) nounwind {`
			`entry:`
			`%div = sdiv i16 %x, 33 ; <i32> [#uses=1]`
Fix for PR2164: allow transforming arbitrary-width unsigned divides into multiplies. Some more cleverness would be nice, though. It would be nice if we could do this transformation on illegal types. Also, we would prefer a narrower constant when possible so that we can use a narrower multiply, which can be cheaper. llvm-svn: 60283 2008-11-30 07:02:26 +01:00			`ret i16 %div`
merge two tests llvm-svn: 121847 2010-12-15 06:58:59 +01:00			`; CHECK: test4:`
Be more aggressive about following hints. RAGreedy::tryAssign will now evict interference from the preferred register even when another register is free. To support this, add the EvictionCost struct that counts how many hints are broken by an eviction. We don't want to break one hint just to satisfy another. Rename canEvict to shouldEvict, and add the first bit of eviction policy that doesn't depend on spill weights: Always make room in the preferred register as long as the evictees can be split and aren't already assigned to their preferred register. Also make the CSR avoidance more accurate. When looking for a cheaper register it is OK to use a new volatile register. Only CSR aliases that have never been used before should be avoided. llvm-svn: 134735 2011-07-08 22:46:18 +02:00			`; CHECK: imull $1986, %eax, %`
Fix for PR2164: allow transforming arbitrary-width unsigned divides into multiplies. Some more cleverness would be nice, though. It would be nice if we could do this transformation on illegal types. Also, we would prefer a narrower constant when possible so that we can use a narrower multiply, which can be cheaper. llvm-svn: 60283 2008-11-30 07:02:26 +01:00			`}`
merge two tests llvm-svn: 121847 2010-12-15 06:58:59 +01:00
			`define i32 @test5(i32 %A) nounwind {`
			`%tmp1 = udiv i32 %A, 1577682821 ; <i32> [#uses=1]`
			`ret i32 %tmp1`
			`; CHECK: test5:`
			`; CHECK: movl $365384439, %eax`
			`; CHECK: mull 4(%esp)`
			`}`

Teach DAGCombine to fold fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2) when c1 equals the amount of bits that are truncated off. This happens all the time when a smul is promoted to a larger type. On x86-64 we now compile "int test(int x) { return x/10; }" into movslq %edi, %rax imulq $1717986919, %rax, %rax movq %rax, %rcx shrq $63, %rcx sarq $34, %rax <- used to be "shrq $32, %rax; sarl $2, %eax" addl %ecx, %eax This fires 96 times in gcc.c on x86-64. llvm-svn: 124559 2011-01-30 17:38:43 +01:00			`define signext i16 @test6(i16 signext %x) nounwind {`
			`entry:`
			`%div = sdiv i16 %x, 10`
			`ret i16 %div`
			`; CHECK: test6:`
			`; CHECK: imull $26215, %eax, %eax`
			`; CHECK: shrl $31, %ecx`
			`; CHECK: sarl $18, %eax`
			`}`
BuildUDIV: If the divisor is even we can simplify the fixup of the multiplied value by introducing an early shift. This allows us to compile "unsigned foo(unsigned x) { return x/28; }" into shrl $2, %edi imulq $613566757, %rdi, %rax shrq $32, %rax ret instead of movl %edi, %eax imulq $613566757, %rax, %rcx shrq $32, %rcx subl %ecx, %eax shrl %eax addl %ecx, %eax shrl $4, %eax on x86_64 llvm-svn: 127829 2011-03-17 21:39:14 +01:00
			`define i32 @test7(i32 %x) nounwind {`
			`%div = udiv i32 %x, 28`
			`ret i32 %div`
			`; CHECK: test7:`
			`; CHECK: shrl $2`
			`; CHECK: movl $613566757`
			`; CHECK: mull`
			`; CHECK-NOT: shrl`
			`; CHECK: ret`
			`}`