X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
|
|
|
; PR5039
|
|
|
|
|
|
|
|
define i32 @test1(i32 %x) nounwind {
|
|
|
|
%and = shl i32 %x, 10
|
|
|
|
%shl = and i32 %and, 31744
|
|
|
|
ret i32 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test1:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: andl $31
|
|
|
|
; CHECK: shll $10
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test2(i32 %x) nounwind {
|
|
|
|
%or = shl i32 %x, 10
|
|
|
|
%shl = or i32 %or, 31744
|
|
|
|
ret i32 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test2:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: orl $31
|
|
|
|
; CHECK: shll $10
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test3(i32 %x) nounwind {
|
|
|
|
%xor = shl i32 %x, 10
|
|
|
|
%shl = xor i32 %xor, 31744
|
|
|
|
ret i32 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test3:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: xorl $31
|
|
|
|
; CHECK: shll $10
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test4(i64 %x) nounwind {
|
|
|
|
%and = shl i64 %x, 40
|
|
|
|
%shl = and i64 %and, 264982302294016
|
|
|
|
ret i64 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test4:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: andq $241
|
|
|
|
; CHECK: shlq $40
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test5(i64 %x) nounwind {
|
|
|
|
%and = shl i64 %x, 40
|
|
|
|
%shl = and i64 %and, 34084860461056
|
|
|
|
ret i64 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test5:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: andq $31
|
|
|
|
; CHECK: shlq $40
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test6(i64 %x) nounwind {
|
|
|
|
%and = shl i64 %x, 32
|
|
|
|
%shl = and i64 %and, -281474976710656
|
|
|
|
ret i64 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test6:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: andq $-65536
|
|
|
|
; CHECK: shlq $32
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test7(i64 %x) nounwind {
|
|
|
|
%or = shl i64 %x, 40
|
|
|
|
%shl = or i64 %or, 264982302294016
|
|
|
|
ret i64 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test7:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: orq $241
|
|
|
|
; CHECK: shlq $40
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test8(i64 %x) nounwind {
|
|
|
|
%or = shl i64 %x, 40
|
|
|
|
%shl = or i64 %or, 34084860461056
|
|
|
|
ret i64 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test8:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: orq $31
|
|
|
|
; CHECK: shlq $40
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test9(i64 %x) nounwind {
|
|
|
|
%xor = shl i64 %x, 40
|
|
|
|
%shl = xor i64 %xor, 264982302294016
|
|
|
|
ret i64 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test9:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: orq $241
|
|
|
|
; CHECK: shlq $40
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test10(i64 %x) nounwind {
|
|
|
|
%xor = shl i64 %x, 40
|
|
|
|
%shl = xor i64 %xor, 34084860461056
|
|
|
|
ret i64 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test10:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: xorq $31
|
|
|
|
; CHECK: shlq $40
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test11(i64 %x) nounwind {
|
|
|
|
%xor = shl i64 %x, 33
|
|
|
|
%shl = xor i64 %xor, -562949953421312
|
|
|
|
ret i64 %shl
|
2013-07-13 22:38:47 +02:00
|
|
|
; CHECK-LABEL: test11:
|
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)
This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a]
movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
andq %rdi, %rax ## encoding: [0x48,0x21,0xf8]
ret ## encoding: [0xc3]
with this patch we can fold the immediate into the and:
andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01]
movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a]
ret ## encoding: [0xc3]
It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.
llvm-svn: 129990
2011-04-22 17:30:40 +02:00
|
|
|
; CHECK: xorq $-65536
|
|
|
|
; CHECK: shlq $33
|
|
|
|
}
|
2015-04-01 21:01:09 +02:00
|
|
|
|
|
|
|
; PR23098
|
|
|
|
define i32 @test12(i32 %x, i32* %y) nounwind {
|
|
|
|
%and = shl i32 %x, 1
|
|
|
|
%shl = and i32 %and, 255
|
|
|
|
store i32 %shl, i32* %y
|
|
|
|
ret i32 %shl
|
|
|
|
; CHECK-LABEL: test12:
|
|
|
|
; CHECK: andl $127
|
|
|
|
; CHECK-NEXT: addl
|
|
|
|
; CHECK-NOT: shl
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test13(i64 %x, i64* %y) nounwind {
|
|
|
|
%and = shl i64 %x, 1
|
|
|
|
%shl = and i64 %and, 255
|
|
|
|
store i64 %shl, i64* %y
|
|
|
|
ret i64 %shl
|
|
|
|
; CHECK-LABEL: test13:
|
|
|
|
; CHECK: andq $127
|
|
|
|
; CHECK-NEXT: addq
|
|
|
|
; CHECK-NOT: shl
|
|
|
|
}
|