From b6e6dd63adcb86cd6f35fa480f3f0aad1b25d952 Mon Sep 17 00:00:00 2001 From: John McCall Date: Sun, 28 Feb 2010 09:55:58 +0000 Subject: [PATCH] Add an override to StringRef::getAsInteger which parses into an APInt. It gets its own implementation totally divorced from the (presumably performance-sensitive) routines which parse into a uint64_t. Add APInt::operator|=(uint64_t), which is situationally much better than using a full APInt. llvm-svn: 97381 --- include/llvm/ADT/APInt.h | 25 +++++++++ include/llvm/ADT/StringRef.h | 14 +++++ lib/Support/StringRef.cpp | 105 +++++++++++++++++++++++++++++++---- 3 files changed, 132 insertions(+), 12 deletions(-) diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index ea940ad1783..3f67ffb4556 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -150,7 +150,17 @@ class APInt { return isSingleWord() ? VAL : pVal[whichWord(bitPosition)]; } + /// Converts a string into a number. The string must be non-empty + /// and well-formed as a number of the given base. The bit-width + /// must be sufficient to hold the result. + /// /// This is used by the constructors that take string arguments. + /// + /// StringRef::getAsInteger is superficially similar but (1) does + /// not assume that the string is well-formed and (2) grows the + /// result to hold the input. + /// + /// @param radix 2, 8, 10, or 16 /// @brief Convert a char array into an APInt void fromString(unsigned numBits, const StringRef &str, uint8_t radix); @@ -571,6 +581,21 @@ public: /// @brief Bitwise OR assignment operator. APInt& operator|=(const APInt& RHS); + /// Performs a bitwise OR operation on this APInt and RHS. RHS is + /// logically zero-extended or truncated to match the bit-width of + /// the LHS. + /// + /// @brief Bitwise OR assignment operator. + APInt& operator|=(uint64_t RHS) { + if (isSingleWord()) { + VAL |= RHS; + clearUnusedBits(); + } else { + pVal[0] |= RHS; + } + return *this; + } + /// Performs a bitwise XOR operation on this APInt and RHS. The result is /// assigned to *this. /// @returns *this after XORing with RHS. diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 61cb558946e..925777000f5 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -18,6 +18,7 @@ namespace llvm { template class SmallVectorImpl; + class APInt; /// StringRef - Represent a constant reference to a string, i.e. a character /// array and a length, which need not be null terminated. @@ -273,6 +274,19 @@ namespace llvm { // TODO: Provide overloads for int/unsigned that check for overflow. + /// getAsInteger - Parse the current string as an integer of the + /// specified radix, or of an autosensed radix if the radix given + /// is 0. The current value in Result is discarded, and the + /// storage is changed to be wide enough to store the parsed + /// integer. + /// + /// Returns true if the string does not solely consist of a valid + /// non-empty number in the appropriate base. + /// + /// APInt::fromString is superficially similar but assumes the + /// string is well-formed in the given radix. + bool getAsInteger(unsigned Radix, APInt &Result) const; + /// @} /// @name Substring Operations /// @{ diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index ae2640b5b94..2b262dcd3fa 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/APInt.h" using namespace llvm; @@ -172,23 +173,28 @@ size_t StringRef::count(StringRef Str) const { return Count; } +static unsigned GetAutoSenseRadix(StringRef &Str) { + if (Str.startswith("0x")) { + Str = Str.substr(2); + return 16; + } else if (Str.startswith("0b")) { + Str = Str.substr(2); + return 2; + } else if (Str.startswith("0")) { + return 8; + } else { + return 10; + } +} + + /// GetAsUnsignedInteger - Workhorse method that converts a integer character /// sequence of radix up to 36 to an unsigned long long value. static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, unsigned long long &Result) { // Autosense radix if not specified. - if (Radix == 0) { - if (Str.startswith("0x")) { - Str = Str.substr(2); - Radix = 16; - } else if (Str.startswith("0b")) { - Str = Str.substr(2); - Radix = 2; - } else if (Str.startswith("0")) - Radix = 8; - else - Radix = 10; - } + if (Radix == 0) + Radix = GetAutoSenseRadix(Str); // Empty strings (after the radix autosense) are invalid. if (Str.empty()) return true; @@ -272,3 +278,78 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { Result = Val; return false; } + +bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { + StringRef Str = *this; + + // Autosense radix if not specified. + if (Radix == 0) + Radix = GetAutoSenseRadix(Str); + + assert(Radix > 1 && Radix <= 36); + + // Empty strings (after the radix autosense) are invalid. + if (Str.empty()) return true; + + // Skip leading zeroes. This can be a significant improvement if + // it means we don't need > 64 bits. + while (!Str.empty() && Str.front() == '0') + Str = Str.substr(1); + + // If it was nothing but zeroes.... + if (Str.empty()) { + Result = APInt(64, 0); + return false; + } + + // (Over-)estimate the required number of bits. + unsigned Log2Radix = 0; + while ((1U << Log2Radix) < Radix) Log2Radix++; + bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix); + + unsigned BitWidth = Log2Radix * Str.size(); + if (BitWidth < Result.getBitWidth()) + BitWidth = Result.getBitWidth(); // don't shrink the result + else + Result.zext(BitWidth); + + APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix + if (!IsPowerOf2Radix) { + // These must have the same bit-width as Result. + RadixAP = APInt(BitWidth, Radix); + CharAP = APInt(BitWidth, 0); + } + + // Parse all the bytes of the string given this radix. + Result = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= '0' && Str[0] <= '9') + CharVal = Str[0]-'0'; + else if (Str[0] >= 'a' && Str[0] <= 'z') + CharVal = Str[0]-'a'+10; + else if (Str[0] >= 'A' && Str[0] <= 'Z') + CharVal = Str[0]-'A'+10; + else + return true; + + // If the parsed value is larger than the integer radix, the string is + // invalid. + if (CharVal >= Radix) + return true; + + // Add in this character. + if (IsPowerOf2Radix) { + Result <<= Log2Radix; + Result |= CharVal; + } else { + Result *= RadixAP; + CharAP = CharVal; + Result += CharAP; + } + + Str = Str.substr(1); + } + + return false; +}