]> Dogcows Code - chaz/openbox/blob - otk/ustring.cc
ustring seems to be working! yay!
[chaz/openbox] / otk / ustring.cc
1 // -*- mode: C++; indent-tabs-mode: nil; c-basic-offset: 2; -*-
2
3 #ifdef HAVE_CONFIG_H
4 # include "../config.h"
5 #endif // HAVE_CONFIG_H
6
7 #include "ustring.hh"
8
9 extern "C" {
10 #include <assert.h>
11 }
12
13 namespace otk {
14
15 // helper functions
16
17 // takes a pointer into a utf8 string and returns a unicode character for the
18 // first character at the pointer
19 unichar utf8_get_char (const char *p)
20 {
21 unichar result = static_cast<unsigned char>(*p);
22
23 // if its not a 7-bit ascii character
24 if((result & 0x80) != 0) {
25 // len is the number of bytes this character takes up in the string
26 unsigned char len = utf8_skip[result];
27 result &= 0x7F >> len;
28
29 while(--len != 0) {
30 result <<= 6;
31 result |= static_cast<unsigned char>(*++p) & 0x3F;
32 }
33 }
34
35 return result;
36 }
37
38 // takes a pointer into a string and finds its offset
39 static ustring::size_type utf8_ptr_to_offset(const char *str, const char *pos)
40 {
41 ustring::size_type offset = 0;
42
43 while (str < pos) {
44 str += utf8_skip[*str];
45 offset++;
46 }
47
48 return offset;
49 }
50
51 // takes an offset into a string and returns a pointer to it
52 const char *utf8_offset_to_ptr(const char *str, ustring::size_type offset)
53 {
54 while (offset--)
55 str += utf8_skip[*str];
56 return str;
57 }
58
59 // First overload: stop on '\0' character.
60 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset)
61 {
62 if(offset == ustring::npos)
63 return ustring::npos;
64
65 const char* p = str;
66
67 for(; offset != 0; --offset)
68 {
69 if(*p == '\0')
70 return ustring::npos;
71
72 p += utf8_skip[*p];
73 }
74
75 return (p - str);
76 }
77
78 // Second overload: stop when reaching maxlen.
79 ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset,
80 ustring::size_type maxlen)
81 {
82 if(offset == ustring::npos)
83 return ustring::npos;
84
85 const char *const pend = str + maxlen;
86 const char* p = str;
87
88 for(; offset != 0; --offset)
89 {
90 if(p >= pend)
91 return ustring::npos;
92
93 p += utf8_skip[*p];
94 }
95
96 return (p - str);
97 }
98
99
100 // ustring methods
101
102 ustring::ustring()
103 {
104 }
105
106 ustring::~ustring()
107 {
108 }
109
110 ustring::ustring(const ustring& other)
111 : _string(other._string), _utf8(other._utf8)
112 {
113 }
114
115 ustring& ustring::operator=(const ustring& other)
116 {
117 _string = other._string;
118 _utf8 = other._utf8;
119 return *this;
120 }
121
122 ustring::ustring(const std::string& src)
123 : _string(src), _utf8(true)
124 {
125 }
126
127 ustring::ustring(const char* src)
128 : _string(src), _utf8(true)
129 {
130 }
131
132 ustring& ustring::operator+=(const ustring& src)
133 {
134 assert(_utf8 == src._utf8);
135 _string += src._string;
136 return *this;
137 }
138
139 ustring& ustring::operator+=(const char* src)
140 {
141 _string += src;
142 return *this;
143 }
144
145 ustring& ustring::operator+=(char c)
146 {
147 _string += c;
148 return *this;
149 }
150
151 ustring::size_type ustring::size() const
152 {
153 if (_utf8) {
154 const char *const pdata = _string.data();
155 return utf8_ptr_to_offset(pdata, pdata + _string.size());
156 } else
157 return _string.size();
158 }
159
160 ustring::size_type ustring::bytes() const
161 {
162 return _string.size();
163 }
164
165 ustring::size_type ustring::capacity() const
166 {
167 return _string.capacity();
168 }
169
170 ustring::size_type ustring::max_size() const
171 {
172 return _string.max_size();
173 }
174
175 bool ustring::empty() const
176 {
177 return _string.empty();
178 }
179
180 void ustring::clear()
181 {
182 _string.erase();
183 }
184
185 ustring& ustring::erase(ustring::size_type i, ustring::size_type n)
186 {
187 if (_utf8) {
188 // find a proper offset
189 size_type utf_i = utf8_byte_offset(_string.c_str(), i);
190 if (utf_i != npos) {
191 // if the offset is not npos, find a proper length for 'n'
192 size_type utf_n = utf8_byte_offset(_string.data() + utf_i, n,
193 _string.size() - utf_i);
194 _string.erase(utf_i, utf_n);
195 }
196 } else
197 _string.erase(i, n);
198
199 return *this;
200 }
201
202 void ustring::resize(ustring::size_type n, char c)
203 {
204 if (_utf8) {
205 const size_type size_now = size();
206 if(n < size_now)
207 erase(n, npos);
208 else if(n > size_now)
209 _string.append(n - size_now, c);
210 } else
211 _string.resize(n, c);
212 }
213
214 ustring::value_type ustring::operator[](ustring::size_type i) const
215 {
216 return utf8_get_char(utf8_offset_to_ptr(_string.data(), i));
217 }
218
219 const char* ustring::data() const
220 {
221 return _string.data();
222 }
223
224 const char* ustring::c_str() const
225 {
226 return _string.c_str();
227 }
228
229 bool ustring::utf8() const
230 {
231 return _utf8;
232 }
233
234 void ustring::setUtf8(bool utf8)
235 {
236 _utf8 = utf8;
237 }
238
239 }
This page took 0.042972 seconds and 5 git commands to generate.