Dogcows Code - chaz/yoink/blob - src/stlplus/strings/string_inf.cpp

   1 ////////////////////////////////////////////////////////////////////////////////
   2
   3 //   Author:    Andy Rushton
   4 //   Copyright: (c) Southampton University 1999-2004
   5 //              (c) Andy Rushton           2004-2009
   6 //   License:   BSD License, see ../docs/license.html
   7
   8 //   String conversion functions for the infinite precision integer type inf
   9
  10 ////////////////////////////////////////////////////////////////////////////////
  11
  12 // can be excluded from the build to break the dependency on the portability library
  13 #ifndef NO_STLPLUS_INF
  14
  15 #include "string_inf.hpp"
  16 #include "string_basic.hpp"
  17 #include <ctype.h>
  18
  19 ////////////////////////////////////////////////////////////////////////////////
  20
  21 namespace stlplus
  22 {
  23
  24   ////////////////////////////////////////////////////////////////////////////////
  25
  26   static char to_char [] = "0123456789abcdefghijklmnopqrstuvwxyz";
  27   static int from_char [] =
  28   {
  29     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  30     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  31     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  32     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
  33     -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
  34     25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,
  35     -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
  36     25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,
  37     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  38     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  39     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  40     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  41     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  42     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  43     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  44     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
  45   };
  46
  47   ////////////////////////////////////////////////////////////////////////////////
  48
  49   std::string inf_to_string(const stlplus::inf& data, unsigned radix, radix_display_t display, unsigned width)
  50     throw(std::invalid_argument)
  51   {
  52     std::string result;
  53     if (radix < 2 || radix > 36)
  54       throw std::invalid_argument("invalid radix value");
  55     inf local_i = data;
  56     // untangle all the options
  57     bool hashed = false;
  58     bool binary = false;
  59     bool octal = false;
  60     bool hex = false;
  61     switch(display)
  62     {
  63     case radix_none:
  64       break;
  65     case radix_hash_style:
  66       hashed = radix != 10;
  67       break;
  68     case radix_hash_style_all:
  69       hashed = true;
  70       break;
  71     case radix_c_style:
  72       if (radix == 16)
  73         hex = true;
  74       else if (radix == 8)
  75         octal = true;
  76       else if (radix == 2)
  77         binary = true;
  78       break;
  79     case radix_c_style_or_hash:
  80       if (radix == 16)
  81         hex = true;
  82       else if (radix == 8)
  83         octal = true;
  84       else if (radix == 2)
  85         binary = true;
  86       else if (radix != 10)
  87         hashed = true;
  88       break;
  89     default:
  90       throw std::invalid_argument("invalid radix display value");
  91     }
  92     // create constants of the same type as the template parameter to avoid type mismatches
  93     const inf t_zero(0);
  94     const inf t_radix(radix);
  95     // the C representations for binary, octal and hex use 2's-complement representation
  96     // all other represenations use sign-magnitude
  97     if (hex || octal || binary)
  98     {
  99       // bit-pattern representation
 100       // this is the binary representation optionally shown in octal or hex
 101       // first generate the binary by masking the bits
 102       for (unsigned j = local_i.bits(); j--; )
 103         result += (local_i.bit(j) ? '1' : '0');
 104       // the result is now the full width of the type - e.g. int will give a 32-bit result
 105       // now interpret this as either binary, octal or hex and add the prefix
 106       if (binary)
 107       {
 108         // the result is already binary - but the width may be wrong
 109         // if this is still smaller than the width field, sign extend
 110         // otherwise trim down to either the width or the smallest string that preserves the value
 111         while (result.size() < width)
 112           result.insert((std::string::size_type)0, 1, result[0]);
 113         while (result.size() > width)
 114         {
 115           // do not trim to less than 1 bit (sign only)
 116           if (result.size() <= 1) break;
 117           // only trim if it doesn't change the sign and therefore the value
 118           if (result[0] != result[1]) break;
 119           result.erase(0,1);
 120         }
 121         // add the prefix
 122         result.insert((std::string::size_type)0, "0b");
 123       }
 124       else if (octal)
 125       {
 126         // the result is currently binary - but before converting get the width right
 127         // the width is expressed in octal digits so make the binary 3 times this
 128         // if this is still smaller than the width field, sign extend
 129         // otherwise trim down to either the width or the smallest string that preserves the value
 130         // also ensure that the binary is a multiple of 3 bits to make the conversion to octal easier
 131         while (result.size() < 3*width)
 132           result.insert((std::string::size_type)0, 1, result[0]);
 133         while (result.size() > 3*width)
 134         {
 135           // do not trim to less than 2 bits (sign plus 1-bit magnitude)
 136           if (result.size() <= 2) break;
 137           // only trim if it doesn't change the sign and therefore the value
 138           if (result[0] != result[1]) break;
 139           result.erase(0,1);
 140         }
 141         while (result.size() % 3 != 0)
 142           result.insert((std::string::size_type)0, 1, result[0]);
 143         // now convert to octal
 144         std::string octal_result;
 145         for (unsigned i = 0; i < result.size()/3; i++)
 146         {
 147           // yuck - ugly or what?
 148           if (result[i*3] == '0')
 149           {
 150             if (result[i*3+1] == '0')
 151             {
 152               if (result[i*3+2] == '0')
 153                 octal_result += '0';
 154               else
 155                 octal_result += '1';
 156             }
 157             else
 158             {
 159               if (result[i*3+2] == '0')
 160                 octal_result += '2';
 161               else
 162                 octal_result += '3';
 163             }
 164           }
 165           else
 166           {
 167             if (result[i*3+1] == '0')
 168             {
 169               if (result[i*3+2] == '0')
 170                 octal_result += '4';
 171               else
 172                 octal_result += '5';
 173             }
 174             else
 175             {
 176               if (result[i*3+2] == '0')
 177                 octal_result += '6';
 178               else
 179                 octal_result += '7';
 180             }
 181           }
 182         }
 183         result = octal_result;
 184         // add the prefix
 185         result.insert((std::string::size_type)0, "0");
 186       }
 187       else
 188       {
 189         // similar to octal
 190         while (result.size() < 4*width)
 191           result.insert((std::string::size_type)0, 1, result[0]);
 192         while (result.size() > 4*width)
 193         {
 194           // do not trim to less than 2 bits (sign plus 1-bit magnitude)
 195           if (result.size() <= 2) break;
 196           // only trim if it doesn't change the sign and therefore the value
 197           if (result[0] != result[1]) break;
 198           result.erase(0,1);
 199         }
 200         while (result.size() % 4 != 0)
 201           result.insert((std::string::size_type)0, 1, result[0]);
 202         // now convert to hex
 203         std::string hex_result;
 204         for (unsigned i = 0; i < result.size()/4; i++)
 205         {
 206           // yuck - ugly or what?
 207           if (result[i*4] == '0')
 208           {
 209             if (result[i*4+1] == '0')
 210             {
 211               if (result[i*4+2] == '0')
 212               {
 213                 if (result[i*4+3] == '0')
 214                   hex_result += '0';
 215                 else
 216                   hex_result += '1';
 217               }
 218               else
 219               {
 220                 if (result[i*4+3] == '0')
 221                   hex_result += '2';
 222                 else
 223                   hex_result += '3';
 224               }
 225             }
 226             else
 227             {
 228               if (result[i*4+2] == '0')
 229               {
 230                 if (result[i*4+3] == '0')
 231                   hex_result += '4';
 232                 else
 233                   hex_result += '5';
 234               }
 235               else
 236               {
 237                 if (result[i*4+3] == '0')
 238                   hex_result += '6';
 239                 else
 240                   hex_result += '7';
 241               }
 242             }
 243           }
 244           else
 245           {
 246             if (result[i*4+1] == '0')
 247             {
 248               if (result[i*4+2] == '0')
 249               {
 250                 if (result[i*4+3] == '0')
 251                   hex_result += '8';
 252                 else
 253                   hex_result += '9';
 254               }
 255               else
 256               {
 257                 if (result[i*4+3] == '0')
 258                   hex_result += 'a';
 259                 else
 260                   hex_result += 'b';
 261               }
 262             }
 263             else
 264             {
 265               if (result[i*4+2] == '0')
 266               {
 267                 if (result[i*4+3] == '0')
 268                   hex_result += 'c';
 269                 else
 270                   hex_result += 'd';
 271               }
 272               else
 273               {
 274                 if (result[i*4+3] == '0')
 275                   hex_result += 'e';
 276                 else
 277                   hex_result += 'f';
 278               }
 279             }
 280           }
 281         }
 282         result = hex_result;
 283         // add the prefix
 284         result.insert((std::string::size_type)0, "0x");
 285       }
 286     }
 287     else
 288     {
 289       // convert to sign-magnitude
 290       // the representation is:
 291       // [radix#][sign]magnitude
 292       bool negative = local_i.negative();
 293       local_i.abs();
 294       // create a representation of the magnitude by successive division
 295       do
 296       {
 297         std::pair<inf,inf> divided = local_i.divide(t_radix);
 298         unsigned remainder = divided.second.to_unsigned();
 299         char digit = to_char[remainder];
 300         result.insert((std::string::size_type)0, 1, digit);
 301         local_i = divided.first;
 302       }
 303       while(!local_i.zero() || result.size() < width);
 304       // add the prefixes
 305       // add a sign only for negative values
 306       if (negative)
 307         result.insert((std::string::size_type)0, 1, '-');
 308       // then prefix everything with the radix if the hashed representation was requested
 309       if (hashed)
 310         result.insert((std::string::size_type)0, unsigned_to_string(radix) + "#");
 311     }
 312     return result;
 313   }
 314
 315   ////////////////////////////////////////////////////////////////////////////////
 316   // Conversions FROM string
 317
 318   inf string_to_inf(const std::string& str, unsigned radix) throw(std::invalid_argument)
 319   {
 320     inf result;
 321     if (radix != 0 && (radix < 2 || radix > 36))
 322       throw std::invalid_argument("invalid radix value " + unsigned_to_string(radix));
 323     unsigned i = 0;
 324     // the radix passed as a parameter is just the default - it can be
 325     // overridden by either the C prefix or the hash prefix
 326     // Note: a leading zero is the C-style prefix for octal - I only make this
 327     // override the default when the default radix is not specified
 328     // first check for a C-style prefix
 329     bool c_style = false;
 330     if (i < str.size() && str[i] == '0')
 331     {
 332       // binary or hex
 333       if (i+1 < str.size() && tolower(str[i+1]) == 'x')
 334       {
 335         c_style = true;
 336         radix = 16;
 337         i += 2;
 338       }
 339       else if (i+1 < str.size() && tolower(str[i+1]) == 'b')
 340       {
 341         c_style = true;
 342         radix = 2;
 343         i += 2;
 344       }
 345       else if (radix == 0)
 346       {
 347         c_style = true;
 348         radix = 8;
 349         i += 1;
 350       }
 351     }
 352     // now check for a hash-style prefix if a C-style prefix was not found
 353     if (i == 0)
 354     {
 355       // scan for the sequence {digits}#
 356       bool hash_found = false;
 357       unsigned j = i;
 358       for (; j < str.size(); j++)
 359       {
 360         if (!isdigit(str[j]))
 361         {
 362           if (str[j] == '#')
 363             hash_found = true;
 364           break;
 365         }
 366       }
 367       if (hash_found)
 368       {
 369         // use the hash prefix to define the radix
 370         // i points to the start of the radix and j points to the # character
 371         std::string slice = str.substr(i, j-i);
 372         radix = string_to_unsigned(slice);
 373         i = j+1;
 374       }
 375     }
 376     if (radix == 0)
 377       radix = 10;
 378     if (radix < 2 || radix > 36)
 379       throw std::invalid_argument("invalid radix value");
 380     if (c_style)
 381     {
 382       // the C style formats are bit patterns not integer values - these need
 383       // to be sign-extended to get the right value
 384       std::string binary;
 385       if (radix == 2)
 386       {
 387         for (unsigned j = i; j < str.size(); j++)
 388         {
 389           switch(str[j])
 390           {
 391           case '0':
 392             binary += '0';
 393             break;
 394           case '1':
 395             binary += '1';
 396             break;
 397           default:
 398             throw std::invalid_argument("invalid binary character in string " + str);
 399           }
 400         }
 401       }
 402       else if (radix == 8)
 403       {
 404         for (unsigned j = i; j < str.size(); j++)
 405         {
 406           switch(str[j])
 407           {
 408           case '0':
 409             binary += "000";
 410             break;
 411           case '1':
 412             binary += "001";
 413             break;
 414           case '2':
 415             binary += "010";
 416             break;
 417           case '3':
 418             binary += "011";
 419             break;
 420           case '4':
 421             binary += "100";
 422             break;
 423           case '5':
 424             binary += "101";
 425             break;
 426           case '6':
 427             binary += "110";
 428             break;
 429           case '7':
 430             binary += "111";
 431             break;
 432           default:
 433             throw std::invalid_argument("invalid octal character in string " + str);
 434           }
 435         }
 436       }
 437       else
 438       {
 439         for (unsigned j = i; j < str.size(); j++)
 440         {
 441           switch(tolower(str[j]))
 442           {
 443           case '0':
 444             binary += "0000";
 445             break;
 446           case '1':
 447             binary += "0001";
 448             break;
 449           case '2':
 450             binary += "0010";
 451             break;
 452           case '3':
 453             binary += "0011";
 454             break;
 455           case '4':
 456             binary += "0100";
 457             break;
 458           case '5':
 459             binary += "0101";
 460             break;
 461           case '6':
 462             binary += "0110";
 463             break;
 464           case '7':
 465             binary += "0111";
 466             break;
 467           case '8':
 468             binary += "1000";
 469             break;
 470           case '9':
 471             binary += "1001";
 472             break;
 473           case 'a':
 474             binary += "1010";
 475             break;
 476           case 'b':
 477             binary += "1011";
 478             break;
 479           case 'c':
 480             binary += "1100";
 481             break;
 482           case 'd':
 483             binary += "1101";
 484             break;
 485           case 'e':
 486             binary += "1110";
 487             break;
 488           case 'f':
 489             binary += "1111";
 490             break;
 491           default:
 492             throw std::invalid_argument("invalid hex character in string " + str);
 493           }
 494         }
 495       }
 496       // now convert the value
 497       result.resize(binary.size());
 498       for (unsigned j = 0; j < binary.size(); j++)
 499         result.preset(binary.size() - j - 1, binary[j] == '1');
 500     }
 501     else
 502     {
 503       // now scan for a sign and find whether this is a negative number
 504       bool negative = false;
 505       if (i < str.size())
 506       {
 507         switch (str[i])
 508         {
 509         case '-':
 510           negative = true;
 511           i++;
 512           break;
 513         case '+':
 514           i++;
 515           break;
 516         }
 517       }
 518       for (; i < str.size(); i++)
 519       {
 520         result *= inf(radix);
 521         int ch = from_char[(unsigned char)str[i]] ;
 522         if (ch == -1)
 523           throw std::invalid_argument("invalid character in string " + str + " for radix " + unsigned_to_string(radix));
 524         result += inf(ch);
 525       }
 526       if (negative)
 527         result.negate();
 528     }
 529     return result;
 530   }
 531
 532 ////////////////////////////////////////////////////////////////////////////////
 533
 534 } // end namespace stlplus
 535
 536 #endif