diff --git a/Utilities/types.h b/Utilities/types.h index caafe33158..6f57081444 100644 --- a/Utilities/types.h +++ b/Utilities/types.h @@ -974,45 +974,85 @@ inline void busy_wait(std::size_t count = 100) // Rotate helpers #if defined(__GNUG__) -inline u8 rol8(const u8 x, const u8 n) +inline u8 rol8(u8 x, u8 n) { u8 result = x; __asm__("rolb %[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } -inline u16 rol16(const u16 x, const u16 n) +inline u8 ror8(u8 x, u8 n) +{ + u8 result = x; + __asm__("rorb %[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); + return result; +} + +inline u16 rol16(u16 x, u16 n) { u16 result = x; __asm__("rolw %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } -inline u32 rol32(const u32 x, const u32 n) +inline u16 ror16(u16 x, u16 n) +{ + u16 result = x; + __asm__("rorw %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); + return result; +} + +inline u32 rol32(u32 x, u32 n) { u32 result = x; __asm__("roll %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } -inline u64 rol64(const u64 x, const u64 n) +inline u32 ror32(u32 x, u32 n) +{ + u32 result = x; + __asm__("rorl %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); + return result; +} + +inline u64 rol64(u64 x, u64 n) { u64 result = x; __asm__("rolq %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } -inline u64 ror64(const u64 x, const u64 n) +inline u64 ror64(u64 x, u64 n) { u64 result = x; __asm__("rorq %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } +inline u64 umulh64(u64 a, u64 b) +{ + u64 result; + __asm__("mulq %[b]" : "=d" (result) : [a] "a" (a), [b] "rm" (b)); + return result; +} + +inline s64 mulh64(s64 a, s64 b) +{ + s64 result; + __asm__("imulq %[b]" : "=d" (result) : [a] "a" (a), [b] "rm" (b)); + return result; +} + #elif defined(_MSC_VER) -inline u8 rol8(const u8 x, const u8 n) { return _rotl8(x, n); } -inline u16 rol16(const u16 x, const u16 n) { return _rotl16(x, (u8)n); } -inline u32 rol32(const u32 x, const u32 n) { return _rotl(x, (int)n); } -inline u64 rol64(const u64 x, const u64 n) { return _rotl64(x, (int)n); } -inline u64 ror64(const u64 x, const u64 n) { return _rotr64(x, (int)n); } +inline u8 rol8(u8 x, u8 n) { return _rotl8(x, n); } +inline u8 ror8(u8 x, u8 n) { return _rotr8(x, n); } +inline u16 rol16(u16 x, u16 n) { return _rotl16(x, (u8)n); } +inline u16 ror16(u16 x, u16 n) { return _rotr16(x, (u8)n); } +inline u32 rol32(u32 x, u32 n) { return _rotl(x, (int)n); } +inline u32 ror32(u32 x, u32 n) { return _rotr(x, (int)n); } +inline u64 rol64(u64 x, u64 n) { return _rotl64(x, (int)n); } +inline u64 ror64(u64 x, u64 n) { return _rotr64(x, (int)n); } +inline u64 umulh64(u64 x, u64 y) { return __umulh(x, y); } +inline s64 mulh64(s64 x, s64 y) { return __mulh(x, y); } #endif