]> Dogcows Code - chaz/openbox/blob - otk/ustring.cc
kill some whitespace
[chaz/openbox] / otk / ustring.cc
1 // -*- mode: C++; indent-tabs-mode: nil; c-basic-offset: 2; -*-
2
3 #include "config.h"
4
5 #include "ustring.hh"
6
7 #include <cassert>
8
9 namespace otk {
10
11 // helper functions
12
13 // The number of bytes to skip to find the next character in the string
14 static const char utf8_skip[256] = {
15 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
16 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
17 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
18 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
19 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
20 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
21 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
22 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
23 };
24
25 // takes a pointer into a utf8 string and returns a unicode character for the
26 // first character at the pointer
27 unichar utf8_get_char (const char *p)
28 {
29 unichar result = static_cast<unsigned char>(*p);
30
31 // if its not a 7-bit ascii character
32 if((result & 0x80) != 0) {
33 // len is the number of bytes this character takes up in the string
34 unsigned char len = utf8_skip[result];
35 result &= 0x7F >> len;
36
37 while(--len != 0) {
38 result <<= 6;
39 result |= static_cast<unsigned char>(*++p) & 0x3F;
40 }
41 }
42
43 return result;
44 }
45
46 // takes a pointer into a string and finds its offset
47 static ustring::size_type utf8_ptr_to_offset(const char *str, const char *pos)
48 {
49 ustring::size_type offset = 0;
50
51 while (str < pos) {
52 str += utf8_skip[static_cast<unsigned char>(*str)];
53 offset++;
54 }
55
56 return offset;
57 }
58
59 // takes an offset into a string and returns a pointer to it
60 const char *utf8_offset_to_ptr(const char *str, ustring::size_type offset)
61 {
62 while (offset--)
63 str += utf8_skip[static_cast<unsigned char>(*str)];
64 return str;
65 }
66
67 // First overload: stop on '\0' character.
68 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset)
69 {
70 if(offset == ustring::npos)
71 return ustring::npos;
72
73 const char* p = str;
74
75 for(; offset != 0; --offset)
76 {
77 if(*p == '\0')
78 return ustring::npos;
79
80 p += utf8_skip[static_cast<unsigned char>(*p)];
81 }
82
83 return (p - str);
84 }
85
86 // Second overload: stop when reaching maxlen.
87 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset,
88 ustring::size_type maxlen)
89 {
90 if(offset == ustring::npos)
91 return ustring::npos;
92
93 const char *const pend = str + maxlen;
94 const char* p = str;
95
96 for(; offset != 0; --offset)
97 {
98 if(p >= pend)
99 return ustring::npos;
100
101 p += utf8_skip[static_cast<unsigned char>(*p)];
102 }
103
104 return (p - str);
105 }
106
107
108 // ustring methods
109
110 ustring::ustring(bool utf8)
111 : _utf8(utf8)
112 {
113 }
114
115 ustring::~ustring()
116 {
117 }
118
119 ustring::ustring(const ustring& other)
120 : _string(other._string), _utf8(other._utf8)
121 {
122 }
123
124 ustring& ustring::operator=(const ustring& other)
125 {
126 _string = other._string;
127 _utf8 = other._utf8;
128 return *this;
129 }
130
131 ustring::ustring(const std::string& src, bool utf8)
132 : _string(src), _utf8(utf8)
133 {
134 }
135
136 ustring::ustring(const char* src, bool utf8)
137 : _string(src), _utf8(utf8)
138 {
139 }
140
141 ustring& ustring::operator+=(const ustring& src)
142 {
143 assert(_utf8 == src._utf8);
144 _string += src._string;
145 return *this;
146 }
147
148 ustring& ustring::operator+=(const char* src)
149 {
150 _string += src;
151 return *this;
152 }
153
154 ustring& ustring::operator+=(char c)
155 {
156 _string += c;
157 return *this;
158 }
159
160 ustring::size_type ustring::size() const
161 {
162 if (_utf8) {
163 const char *const pdata = _string.data();
164 return utf8_ptr_to_offset(pdata, pdata + _string.size());
165 } else
166 return _string.size();
167 }
168
169 ustring::size_type ustring::bytes() const
170 {
171 return _string.size();
172 }
173
174 ustring::size_type ustring::capacity() const
175 {
176 return _string.capacity();
177 }
178
179 ustring::size_type ustring::max_size() const
180 {
181 return _string.max_size();
182 }
183
184 bool ustring::empty() const
185 {
186 return _string.empty();
187 }
188
189 void ustring::clear()
190 {
191 _string.erase();
192 }
193
194 ustring& ustring::erase(ustring::size_type i, ustring::size_type n)
195 {
196 if (_utf8) {
197 // find a proper offset
198 size_type utf_i = utf8_byte_offset(_string.c_str(), i);
199 if (utf_i != npos) {
200 // if the offset is not npos, find a proper length for 'n'
201 size_type utf_n = utf8_byte_offset(_string.data() + utf_i, n,
202 _string.size() - utf_i);
203 _string.erase(utf_i, utf_n);
204 }
205 } else
206 _string.erase(i, n);
207
208 return *this;
209 }
210
211 void ustring::resize(ustring::size_type n, char c)
212 {
213 if (_utf8) {
214 const size_type size_now = size();
215 if(n < size_now)
216 erase(n, npos);
217 else if(n > size_now)
218 _string.append(n - size_now, c);
219 } else
220 _string.resize(n, c);
221 }
222
223 ustring::value_type ustring::operator[](ustring::size_type i) const
224 {
225 return utf8_get_char(utf8_offset_to_ptr(_string.data(), i));
226 }
227
228 bool ustring::operator==(const ustring &other) const
229 {
230 return _string == other._string && _utf8 == other._utf8;
231 }
232
233 bool ustring::operator==(const std::string &other) const
234 {
235 return _string == other;
236 }
237
238 bool ustring::operator==(const char *other) const
239 {
240 return _string == other;
241 }
242
243 const char* ustring::data() const
244 {
245 return _string.data();
246 }
247
248 const char* ustring::c_str() const
249 {
250 return _string.c_str();
251 }
252
253 bool ustring::utf8() const
254 {
255 return _utf8;
256 }
257
258 void ustring::setUtf8(bool utf8)
259 {
260 _utf8 = utf8;
261 }
262
263 }
This page took 0.047427 seconds and 4 git commands to generate.