llvm-mirror/test/Transforms/InstCombine/bitcast.ll

; RUN: opt < %s -instcombine -S | FileCheck %s

; Bitcasts between vectors and scalars are valid.
; PR4487
define i32 @test1(i64 %a) {
        %t1 = bitcast i64 %a to <2 x i32>
        %t2 = bitcast i64 %a to <2 x i32>
        %t3 = xor <2 x i32> %t1, %t2
        %t4 = extractelement <2 x i32> %t3, i32 0
        ret i32 %t4
        
; CHECK: @test1
; CHECK: ret i32 0
}

; Optimize bitcasts that are extracting low element of vector.  This happens
; because of SRoA.
; rdar://7892780
define float @test2(<2 x float> %A, <2 x i32> %B) {
  %tmp28 = bitcast <2 x float> %A to i64  ; <i64> [#uses=2]
  %tmp23 = trunc i64 %tmp28 to i32                ; <i32> [#uses=1]
  %tmp24 = bitcast i32 %tmp23 to float            ; <float> [#uses=1]

  %tmp = bitcast <2 x i32> %B to i64
  %tmp2 = trunc i64 %tmp to i32                ; <i32> [#uses=1]
  %tmp4 = bitcast i32 %tmp2 to float            ; <float> [#uses=1]

  %add = fadd float %tmp24, %tmp4
  ret float %add
  
; CHECK: @test2
; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
; CHECK-NEXT:  %tmp4 = extractelement <2 x float> {{.*}}, i32 0
; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
; CHECK-NEXT:  ret float %add
}

; Optimize bitcasts that are extracting other elements of a vector.  This
; happens because of SRoA.
; rdar://7892780
define float @test3(<2 x float> %A, <2 x i64> %B) {
  %tmp28 = bitcast <2 x float> %A to i64
  %tmp29 = lshr i64 %tmp28, 32
  %tmp23 = trunc i64 %tmp29 to i32
  %tmp24 = bitcast i32 %tmp23 to float

  %tmp = bitcast <2 x i64> %B to i128
  %tmp1 = lshr i128 %tmp, 64
  %tmp2 = trunc i128 %tmp1 to i32
  %tmp4 = bitcast i32 %tmp2 to float

  %add = fadd float %tmp24, %tmp4
  ret float %add
  
; CHECK: @test3
; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 2
; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
; CHECK-NEXT:  ret float %add
}


define <2 x i32> @test4(i32 %A, i32 %B){
  %tmp38 = zext i32 %A to i64
  %tmp32 = zext i32 %B to i64
  %tmp33 = shl i64 %tmp32, 32
  %ins35 = or i64 %tmp33, %tmp38
  %tmp43 = bitcast i64 %ins35 to <2 x i32>
  ret <2 x i32> %tmp43
  ; CHECK: @test4
  ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0
  ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1
  ; CHECK-NEXT: ret <2 x i32> 

}

; rdar://8360454
define <2 x float> @test5(float %A, float %B) {
  %tmp37 = bitcast float %A to i32
  %tmp38 = zext i32 %tmp37 to i64
  %tmp31 = bitcast float %B to i32
  %tmp32 = zext i32 %tmp31 to i64
  %tmp33 = shl i64 %tmp32, 32
  %ins35 = or i64 %tmp33, %tmp38
  %tmp43 = bitcast i64 %ins35 to <2 x float>
  ret <2 x float> %tmp43
  ; CHECK: @test5
  ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
  ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
  ; CHECK-NEXT: ret <2 x float> 
}

define <2 x float> @test6(float %A){
  %tmp23 = bitcast float %A to i32              ; <i32> [#uses=1]
  %tmp24 = zext i32 %tmp23 to i64                 ; <i64> [#uses=1]
  %tmp25 = shl i64 %tmp24, 32                     ; <i64> [#uses=1]
  %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
  %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
  ret <2 x float> %tmp35
; CHECK: @test6
; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1
; CHECK: ret
}
filecheckize llvm-svn: 112225 2010-08-26 23:51:41 +02:00			`; RUN: opt < %s -instcombine -S \| FileCheck %s`
Fix an instcombine abort on a scalar-to-vector bitcast. This fixes PR4487. llvm-svn: 74646 2009-07-01 23:38:46 +02:00
filecheckize llvm-svn: 112225 2010-08-26 23:51:41 +02:00			`; Bitcasts between vectors and scalars are valid.`
			`; PR4487`
			`define i32 @test1(i64 %a) {`
Fix an instcombine abort on a scalar-to-vector bitcast. This fixes PR4487. llvm-svn: 74646 2009-07-01 23:38:46 +02:00			`%t1 = bitcast i64 %a to <2 x i32>`
			`%t2 = bitcast i64 %a to <2 x i32>`
			`%t3 = xor <2 x i32> %t1, %t2`
			`%t4 = extractelement <2 x i32> %t3, i32 0`
			`ret i32 %t4`
filecheckize llvm-svn: 112225 2010-08-26 23:51:41 +02:00
			`; CHECK: @test1`
			`; CHECK: ret i32 0`
Fix an instcombine abort on a scalar-to-vector bitcast. This fixes PR4487. llvm-svn: 74646 2009-07-01 23:38:46 +02:00			`}`

optimize bitcast(trunc(bitcast(x))) where the result is a float and 'x' is a vector to be a vector element extraction. This allows clang to compile: struct S { float A, B, C, D; }; float foo(struct S A) { return A.A + A.B+A.C+A.D; } into: _foo: ## @foo ## BB#0: ## %entry movd %xmm0, %rax shrq $32, %rax movd %eax, %xmm2 addss %xmm0, %xmm2 movapd %xmm1, %xmm3 addss %xmm2, %xmm3 movd %xmm1, %rax shrq $32, %rax movd %eax, %xmm0 addss %xmm3, %xmm0 ret instead of: _foo: ## @foo ## BB#0: ## %entry movd %xmm0, %rax movd %eax, %xmm0 shrq $32, %rax movd %eax, %xmm2 addss %xmm0, %xmm2 movd %xmm1, %rax movd %eax, %xmm1 addss %xmm2, %xmm1 shrq $32, %rax movd %eax, %xmm0 addss %xmm1, %xmm0 ret ... eliminating half of the horribleness. llvm-svn: 112227 2010-08-26 23:55:42 +02:00			`; Optimize bitcasts that are extracting low element of vector. This happens`
			`; because of SRoA.`
			`; rdar://7892780`
			`define float @test2(<2 x float> %A, <2 x i32> %B) {`
			`%tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2]`
			`%tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1]`
			`%tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1]`

			`%tmp = bitcast <2 x i32> %B to i64`
			`%tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1]`
			`%tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1]`

			`%add = fadd float %tmp24, %tmp4`
			`ret float %add`

			`; CHECK: @test2`
			`; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0`
			`; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float>`
			`; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 0`
			`; CHECK-NEXT: %add = fadd float %tmp24, %tmp4`
			`; CHECK-NEXT: ret float %add`
			`}`
optimize "integer extraction out of the middle of a vector" as produced by SRoA. This is part of rdar://7892780, but needs another xform to expose this. llvm-svn: 112232 2010-08-27 00:14:59 +02:00
			`; Optimize bitcasts that are extracting other elements of a vector. This`
			`; happens because of SRoA.`
			`; rdar://7892780`
			`define float @test3(<2 x float> %A, <2 x i64> %B) {`
			`%tmp28 = bitcast <2 x float> %A to i64`
			`%tmp29 = lshr i64 %tmp28, 32`
			`%tmp23 = trunc i64 %tmp29 to i32`
			`%tmp24 = bitcast i32 %tmp23 to float`

			`%tmp = bitcast <2 x i64> %B to i128`
			`%tmp1 = lshr i128 %tmp, 64`
			`%tmp2 = trunc i128 %tmp1 to i32`
			`%tmp4 = bitcast i32 %tmp2 to float`

			`%add = fadd float %tmp24, %tmp4`
			`ret float %add`

			`; CHECK: @test3`
			`; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 1`
			`; CHECK-NEXT: bitcast <2 x i64> %B to <4 x float>`
			`; CHECK-NEXT: %tmp4 = extractelement <4 x float> {{.*}}, i32 2`
			`; CHECK-NEXT: %add = fadd float %tmp24, %tmp4`
			`; CHECK-NEXT: ret float %add`
			`}`
optimize bitcasts from large integers to vector into vector element insertion from the pieces that feed into the vector. This handles a pattern that occurs frequently due to code generated for the x86-64 abi. We now compile something like this: struct S { float A, B, C, D; }; struct S g; struct S bar() { struct S A = g; ++A.A; ++A.C; return A; } into all nice vector operations: _bar: ## @bar ## BB#0: ## %entry movq _g@GOTPCREL(%rip), %rax movss LCPI1_0(%rip), %xmm1 movss (%rax), %xmm0 addss %xmm1, %xmm0 pshufd $16, %xmm0, %xmm0 movss 4(%rax), %xmm2 movss 12(%rax), %xmm3 pshufd $16, %xmm2, %xmm2 unpcklps %xmm2, %xmm0 addss 8(%rax), %xmm1 pshufd $16, %xmm1, %xmm1 pshufd $16, %xmm3, %xmm2 unpcklps %xmm2, %xmm1 ret instead of icky integer operations: _bar: ## @bar movq _g@GOTPCREL(%rip), %rax movss LCPI1_0(%rip), %xmm1 movss (%rax), %xmm0 addss %xmm1, %xmm0 movd %xmm0, %ecx movl 4(%rax), %edx movl 12(%rax), %esi shlq $32, %rdx addq %rcx, %rdx movd %rdx, %xmm0 addss 8(%rax), %xmm1 movd %xmm1, %eax shlq $32, %rsi addq %rax, %rsi movd %rsi, %xmm1 ret This resolves rdar://8360454 llvm-svn: 112343 2010-08-28 03:20:38 +02:00

			`define <2 x i32> @test4(i32 %A, i32 %B){`
			`%tmp38 = zext i32 %A to i64`
			`%tmp32 = zext i32 %B to i64`
			`%tmp33 = shl i64 %tmp32, 32`
			`%ins35 = or i64 %tmp33, %tmp38`
			`%tmp43 = bitcast i64 %ins35 to <2 x i32>`
			`ret <2 x i32> %tmp43`
			`; CHECK: @test4`
			`; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0`
			`; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1`
			`; CHECK-NEXT: ret <2 x i32>`

			`}`

			`; rdar://8360454`
			`define <2 x float> @test5(float %A, float %B) {`
			`%tmp37 = bitcast float %A to i32`
			`%tmp38 = zext i32 %tmp37 to i64`
			`%tmp31 = bitcast float %B to i32`
			`%tmp32 = zext i32 %tmp31 to i64`
			`%tmp33 = shl i64 %tmp32, 32`
			`%ins35 = or i64 %tmp33, %tmp38`
			`%tmp43 = bitcast i64 %ins35 to <2 x float>`
			`ret <2 x float> %tmp43`
			`; CHECK: @test5`
			`; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0`
			`; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1`
			`; CHECK-NEXT: ret <2 x float>`
			`}`
handle the constant case of vector insertion. For something like this: struct S { float A, B, C, D; }; struct S g; struct S bar() { struct S A = g; ++A.B; A.A = 42; return A; } we now generate: _bar: ## @bar ## BB#0: ## %entry movq _g@GOTPCREL(%rip), %rax movss 12(%rax), %xmm0 pshufd $16, %xmm0, %xmm0 movss 4(%rax), %xmm2 movss 8(%rax), %xmm1 pshufd $16, %xmm1, %xmm1 unpcklps %xmm0, %xmm1 addss LCPI1_0(%rip), %xmm2 pshufd $16, %xmm2, %xmm2 movss LCPI1_1(%rip), %xmm0 pshufd $16, %xmm0, %xmm0 unpcklps %xmm2, %xmm0 ret instead of: _bar: ## @bar ## BB#0: ## %entry movq _g@GOTPCREL(%rip), %rax movss 12(%rax), %xmm0 pshufd $16, %xmm0, %xmm0 movss 4(%rax), %xmm2 movss 8(%rax), %xmm1 pshufd $16, %xmm1, %xmm1 unpcklps %xmm0, %xmm1 addss LCPI1_0(%rip), %xmm2 movd %xmm2, %eax shlq $32, %rax addq $1109917696, %rax ## imm = 0x42280000 movd %rax, %xmm0 ret llvm-svn: 112345 2010-08-28 03:50:57 +02:00
			`define <2 x float> @test6(float %A){`
			`%tmp23 = bitcast float %A to i32 ; <i32> [#uses=1]`
			`%tmp24 = zext i32 %tmp23 to i64 ; <i64> [#uses=1]`
			`%tmp25 = shl i64 %tmp24, 32 ; <i64> [#uses=1]`
			`%mask20 = or i64 %tmp25, 1109917696 ; <i64> [#uses=1]`
			`%tmp35 = bitcast i64 %mask20 to <2 x float> ; <<2 x float>> [#uses=1]`
			`ret <2 x float> %tmp35`
			`; CHECK: @test6`
			`; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1`
			`; CHECK: ret`
			`}`