Dogcows Code - chaz/openbox/blob - otk/ustring.cc

   1 // -*- mode: C++; indent-tabs-mode: nil; c-basic-offset: 2; -*-
   2
   3 #include "config.h"
   4
   5 #include "ustring.hh"
   6
   7 #include <cassert>
   8
   9 namespace otk {
  10
  11 // helper functions
  12
  13 // The number of bytes to skip to find the next character in the string
  14 static const char utf8_skip[256] = {
  15   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  16   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  17   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  18   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  19   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  20   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  21   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  22   3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
  23 };
  24
  25 // takes a pointer into a utf8 string and returns a unicode character for the
  26 // first character at the pointer
  27 unichar utf8_get_char (const char *p)
  28 {
  29   unichar result = static_cast<unsigned char>(*p);
  30
  31   // if its not a 7-bit ascii character
  32   if((result & 0x80) != 0) {
  33     // len is the number of bytes this character takes up in the string
  34     unsigned char len = utf8_skip[result];
  35     result &= 0x7F >> len;
  36
  37     while(--len != 0) {
  38       result <<= 6;
  39       result |= static_cast<unsigned char>(*++p) & 0x3F;
  40     }
  41   }
  42
  43   return result;
  44 }
  45
  46 // takes a pointer into a string and finds its offset
  47 static ustring::size_type utf8_ptr_to_offset(const char *str, const char *pos)
  48 {
  49   ustring::size_type offset = 0;
  50
  51   while (str < pos) {
  52     str += utf8_skip[static_cast<unsigned char>(*str)];
  53     offset++;
  54   }
  55
  56   return offset;
  57 }
  58
  59 // takes an offset into a string and returns a pointer to it
  60 const char *utf8_offset_to_ptr(const char *str, ustring::size_type offset)
  61 {
  62   while (offset--)
  63     str += utf8_skip[static_cast<unsigned char>(*str)];
  64   return str;
  65 }
  66
  67 // First overload: stop on '\0' character.
  68 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset)
  69 {
  70   if(offset == ustring::npos)
  71     return ustring::npos;
  72
  73   const char* p = str;
  74
  75   for(; offset != 0; --offset)
  76   {
  77     if(*p == '\0')
  78       return ustring::npos;
  79
  80     p += utf8_skip[static_cast<unsigned char>(*p)];
  81   }
  82
  83   return (p - str);
  84 }
  85
  86 // Second overload: stop when reaching maxlen.
  87 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset,
  88                                     ustring::size_type maxlen)
  89 {
  90   if(offset == ustring::npos)
  91     return ustring::npos;
  92
  93   const char *const pend = str + maxlen;
  94   const char* p = str;
  95
  96   for(; offset != 0; --offset)
  97   {
  98     if(p >= pend)
  99       return ustring::npos;
 100
 101     p += utf8_skip[static_cast<unsigned char>(*p)];
 102   }
 103
 104   return (p - str);
 105 }
 106
 107
 108 // ustring methods
 109
 110 ustring::ustring(bool utf8)
 111   : _utf8(utf8)
 112 {
 113 }
 114
 115 ustring::~ustring()
 116 {
 117 }
 118
 119 ustring::ustring(const ustring& other)
 120   : _string(other._string), _utf8(other._utf8)
 121 {
 122 }
 123
 124 ustring& ustring::operator=(const ustring& other)
 125 {
 126   _string = other._string;
 127   _utf8 = other._utf8;
 128   return *this;
 129 }
 130
 131 ustring::ustring(const std::string& src, bool utf8)
 132   : _string(src), _utf8(utf8)
 133 {
 134 }
 135
 136 ustring::ustring(const char* src, bool utf8)
 137   : _string(src), _utf8(utf8)
 138 {
 139 }
 140
 141 ustring& ustring::operator+=(const ustring& src)
 142 {
 143   assert(_utf8 == src._utf8);
 144   _string += src._string;
 145   return *this;
 146 }
 147
 148 ustring& ustring::operator+=(const char* src)
 149 {
 150   _string += src;
 151   return *this;
 152 }
 153
 154 ustring& ustring::operator+=(char c)
 155 {
 156   _string += c;
 157   return *this;
 158 }
 159
 160 ustring::size_type ustring::size() const
 161 {
 162   if (_utf8) {
 163     const char *const pdata = _string.data();
 164     return utf8_ptr_to_offset(pdata, pdata + _string.size());
 165   } else
 166     return _string.size();
 167 }
 168
 169 ustring::size_type ustring::bytes() const
 170 {
 171   return _string.size();
 172 }
 173
 174 ustring::size_type ustring::capacity() const
 175 {
 176   return _string.capacity();
 177 }
 178
 179 ustring::size_type ustring::max_size() const
 180 {
 181   return _string.max_size();
 182 }
 183
 184 bool ustring::empty() const
 185 {
 186   return _string.empty();
 187 }
 188
 189 void ustring::clear()
 190 {
 191   _string.erase();
 192 }
 193
 194 ustring& ustring::erase(ustring::size_type i, ustring::size_type n)
 195 {
 196   if (_utf8) {
 197     // find a proper offset
 198     size_type utf_i = utf8_byte_offset(_string.c_str(), i);
 199     if (utf_i != npos) {
 200       // if the offset is not npos, find a proper length for 'n'
 201       size_type utf_n = utf8_byte_offset(_string.data() + utf_i, n,
 202                                          _string.size() - utf_i);
 203       _string.erase(utf_i, utf_n);
 204     }
 205   } else
 206     _string.erase(i, n);
 207
 208   return *this;
 209 }
 210
 211 void ustring::resize(ustring::size_type n, char c)
 212 {
 213   if (_utf8) {
 214     const size_type size_now = size();
 215     if(n < size_now)
 216       erase(n, npos);
 217     else if(n > size_now)
 218       _string.append(n - size_now, c);
 219   } else
 220     _string.resize(n, c);
 221 }
 222
 223 ustring::value_type ustring::operator[](ustring::size_type i) const
 224 {
 225   return utf8_get_char(utf8_offset_to_ptr(_string.data(), i));
 226 }
 227
 228 bool ustring::operator==(const ustring &other) const
 229 {
 230   return _string == other._string && _utf8 == other._utf8;
 231 }
 232
 233 bool ustring::operator==(const std::string &other) const
 234 {
 235   return _string == other;
 236 }
 237
 238 bool ustring::operator==(const char *other) const
 239 {
 240   return _string == other;
 241 }
 242
 243 const char* ustring::data() const
 244 {
 245   return _string.data();
 246 }
 247
 248 const char* ustring::c_str() const
 249 {
 250   return _string.c_str();
 251 }
 252
 253 bool ustring::utf8() const
 254 {
 255   return _utf8;
 256 }
 257
 258 void ustring::setUtf8(bool utf8)
 259 {
 260   _utf8 = utf8;
 261 }
 262
 263 }