]> Dogcows Code - chaz/yoink/blob - src/stlplus/strings/string_utilities.cpp
build system enhancements
[chaz/yoink] / src / stlplus / strings / string_utilities.cpp
1 ////////////////////////////////////////////////////////////////////////////////
2
3 // Author: Andy Rushton
4 // Copyright: (c) Southampton University 1999-2004
5 // (c) Andy Rushton 2004-2009
6 // License: BSD License, see ../docs/license.html
7
8 ////////////////////////////////////////////////////////////////////////////////
9 #include "string_utilities.hpp"
10 #include "string_basic.hpp"
11 #include <stdlib.h>
12 #include <ctype.h>
13 #include <stdarg.h>
14 #include <stdio.h>
15
16 namespace stlplus
17 {
18
19 // added as a local copy to break the dependency on the portability library
20 static std::string local_dformat(const char* format, ...) throw(std::invalid_argument)
21 {
22 std::string formatted;
23 va_list args;
24 va_start(args, format);
25 #ifdef MSWINDOWS
26 int length = 0;
27 char* buffer = 0;
28 for(int buffer_length = 256; ; buffer_length*=2)
29 {
30 buffer = (char*)malloc(buffer_length);
31 if (!buffer) throw std::invalid_argument("string_utilities");
32 length = _vsnprintf(buffer, buffer_length-1, format, args);
33 if (length >= 0)
34 {
35 buffer[length] = 0;
36 formatted += std::string(buffer);
37 free(buffer);
38 break;
39 }
40 free(buffer);
41 }
42 #else
43 char* buffer = 0;
44 int length = vasprintf(&buffer, format, args);
45 if (!buffer) throw std::invalid_argument("string_utilities");
46 if (length >= 0)
47 formatted += std::string(buffer);
48 free(buffer);
49 #endif
50 va_end(args);
51 if (length < 0) throw std::invalid_argument("string_utilities");
52 return formatted;
53 }
54
55 ////////////////////////////////////////////////////////////////////////////////
56
57 std::string pad(const std::string& str, alignment_t alignment, unsigned width, char padch)
58 throw(std::invalid_argument)
59 {
60 std::string result = str;
61 switch(alignment)
62 {
63 case align_left:
64 {
65 unsigned padding = width>str.size() ? width - str.size() : 0;
66 unsigned i = 0;
67 while (i++ < padding)
68 result.insert(result.end(), padch);
69 break;
70 }
71 case align_right:
72 {
73 unsigned padding = width>str.size() ? width - str.size() : 0;
74 unsigned i = 0;
75 while (i++ < padding)
76 result.insert(result.begin(), padch);
77 break;
78 }
79 case align_centre:
80 {
81 unsigned padding = width>str.size() ? width - str.size() : 0;
82 unsigned i = 0;
83 while (i++ < padding/2)
84 result.insert(result.end(), padch);
85 i--;
86 while (i++ < padding)
87 result.insert(result.begin(), padch);
88 break;
89 }
90 default:
91 throw std::invalid_argument("invalid alignment value");
92 }
93 return result;
94 }
95
96 ////////////////////////////////////////////////////////////////////////////////
97
98 std::string trim_left(const std::string& val)
99 {
100 std::string result = val;
101 while (!result.empty() && isspace(result[0]))
102 result.erase(result.begin());
103 return result;
104 }
105
106 std::string trim_right(const std::string& val)
107 {
108 std::string result = val;
109 while (!result.empty() && isspace(result[result.size()-1]))
110 result.erase(result.end()-1);
111 return result;
112 }
113
114 std::string trim(const std::string& val)
115 {
116 std::string result = val;
117 while (!result.empty() && isspace(result[0]))
118 result.erase(result.begin());
119 while (!result.empty() && isspace(result[result.size()-1]))
120 result.erase(result.end()-1);
121 return result;
122 }
123
124 ////////////////////////////////////////////////////////////////////////////////
125
126 std::string lowercase(const std::string& val)
127 {
128 std::string text = val;
129 for (unsigned i = 0; i < text.size(); i++)
130 text[i] = tolower(text[i]);
131 return text;
132 }
133
134 std::string uppercase(const std::string& val)
135 {
136 std::string text = val;
137 for (unsigned i = 0; i < text.size(); i++)
138 text[i] = toupper(text[i]);
139 return text;
140 }
141
142 ////////////////////////////////////////////////////////////////////////////////
143
144 std::string translate(const std::string& input, const std::string& from_set, const std::string& to_set)
145 {
146 std::string result;
147 for (unsigned i = 0; i < input.size(); i++)
148 {
149 char ch = input[i];
150 // check to see if the character is in the from set
151 std::string::size_type found = from_set.find(ch);
152 if (found == std::string::npos)
153 {
154 // not found so just copy across
155 result += ch;
156 }
157 else if (found < to_set.size())
158 {
159 // found and in range so translate
160 result += to_set[found];
161 }
162 }
163 return result;
164 }
165
166 ////////////////////////////////////////////////////////////////////////////////
167 // WARNING: wheel re-invention follows
168 // Given that all shells perform wildcard matching, why don't the library writers put it in the C run-time????????
169 // The problem:
170 // * matches any number of characters - this is achieved by matching 1 and seeing if the remainder matches
171 // if not, try 2 characters and see if the remainder matches etc.
172 // this must be recursive, not iterative, so that multiple *s can appear in the same wildcard expression
173 // ? matches exactly one character so doesn't need the what-if approach
174 // \ escapes special characters such as *, ? and [
175 // [] matches exactly one character in the set - the difficulty is the set can contain ranges, e.g [a-zA-Z0-9]
176 // a set cannot be empty and the ] character can be included by making it the first character
177
178 // function for testing whether a character matches a set
179 // I can't remember the exact rules and I have no definitive references but:
180 // a set contains characters, escaped characters (I think) and ranges in the form a-z
181 // The character '-' can only appear at the start of the set where it is not interpreted as a range
182 // This is a horrible mess - blame the Unix folks for making a hash of wildcards
183
184 static bool match_set (const std::string& set, char match)
185 {
186 // first expand any ranges and remove escape characters to make life more palatable
187 std::string simple_set;
188 for (std::string::const_iterator i = set.begin(); i != set.end(); ++i)
189 {
190 switch(*i)
191 {
192 case '-':
193 {
194 if (i == set.begin())
195 {
196 simple_set += *i;
197 }
198 else if (i+1 == set.end())
199 {
200 return false;
201 }
202 else
203 {
204 // found a set. The first character is already in the result, so first remove it (the set might be empty)
205 simple_set.erase(simple_set.end()-1);
206 char last = *++i;
207 for (char ch = *(i-2); ch <= last; ch++)
208 {
209 simple_set += ch;
210 }
211 }
212 break;
213 }
214 case '\\':
215 if (i+1 == set.end()) {return false;}
216 simple_set += *++i;
217 break;
218 default:
219 simple_set += *i;
220 break;
221 }
222 }
223 std::string::size_type result = simple_set.find(match);
224 return result != std::string::npos;
225 }
226
227 // the recursive bit - basically whenever a * is found you recursively call this for each candidate substring match
228 // until either it succeeds or you run out of string to match
229 // for each * in the wildcard another level of recursion is created
230
231 static bool match_remainder (const std::string& wild, std::string::const_iterator wildi,
232 const std::string& match, std::string::const_iterator matchi)
233 {
234 //cerr << "match_remainder called at " << *matchi << " with wildcard " << *wildi << endl;
235 while (wildi != wild.end() && matchi != match.end())
236 {
237 //cerr << "trying to match " << *matchi << " with wildcard " << *wildi << endl;
238 switch(*wildi)
239 {
240 case '*':
241 {
242 ++wildi;
243 ++matchi;
244 for (std::string::const_iterator i = matchi; i != match.end(); ++i)
245 {
246 // deal with * at the end of the wildcard - there is no remainder then
247 if (wildi == wild.end())
248 {
249 if (i == match.end()-1)
250 return true;
251 }
252 else if (match_remainder(wild, wildi, match, i))
253 {
254 return true;
255 }
256 }
257 return false;
258 }
259 case '[':
260 {
261 // scan for the end of the set using a similar method for avoiding escaped characters
262 bool found = false;
263 std::string::const_iterator end = wildi + 1;
264 for (; !found && end != wild.end(); ++end)
265 {
266 switch(*end)
267 {
268 case ']':
269 {
270 // found the set, now match with its contents excluding the brackets
271 if (!match_set(wild.substr(wildi - wild.begin() + 1, end - wildi - 1), *matchi))
272 return false;
273 found = true;
274 break;
275 }
276 case '\\':
277 if (end == wild.end()-1)
278 return false;
279 ++end;
280 break;
281 default:
282 break;
283 }
284 }
285 if (!found)
286 return false;
287 ++matchi;
288 wildi = end;
289 break;
290 }
291 case '?':
292 ++wildi;
293 ++matchi;
294 break;
295 case '\\':
296 if (wildi == wild.end()-1)
297 return false;
298 ++wildi;
299 if (*wildi != *matchi)
300 return false;
301 ++wildi;
302 ++matchi;
303 break;
304 default:
305 if (*wildi != *matchi)
306 return false;
307 ++wildi;
308 ++matchi;
309 break;
310 }
311 }
312 bool result = wildi == wild.end() && matchi == match.end();
313 return result;
314 }
315
316 // like all recursions the exported function has a simpler interface than the
317 // recursive function and is just a 'seed' to the recursion itself
318
319 bool match_wildcard(const std::string& wild, const std::string& match)
320 {
321 return match_remainder(wild, wild.begin(), match, match.begin());
322 }
323
324 ////////////////////////////////////////////////////////////////////////////////
325
326 std::vector<std::string> split(const std::string& str, const std::string& splitter)
327 {
328 std::vector<std::string> result;
329 if (!str.empty())
330 {
331 for(std::string::size_type offset = 0;;)
332 {
333 std::string::size_type found = str.find(splitter, offset);
334 if (found != std::string::npos)
335 {
336 result.push_back(str.substr(offset, found-offset));
337 offset = found + splitter.size();
338 }
339 else
340 {
341 result.push_back(str.substr(offset, str.size()-offset));
342 break;
343 }
344 }
345 }
346 return result;
347 }
348
349 std::string join (const std::vector<std::string>& str,
350 const std::string& joiner,
351 const std::string& prefix,
352 const std::string& suffix)
353 {
354 std::string result = prefix;
355 for (unsigned i = 0; i < str.size(); i++)
356 {
357 if (i) result += joiner;
358 result += str[i];
359 }
360 result += suffix;
361 return result;
362 }
363
364 ////////////////////////////////////////////////////////////////////////////////
365
366 std::string display_bytes(long bytes)
367 {
368 std::string result;
369 if (bytes < 0)
370 {
371 result += '-';
372 bytes = -bytes;
373 }
374 static const long kB = 1024l;
375 static const long MB = kB * kB;
376 static const long GB = MB * kB;
377 if (bytes < kB)
378 result += local_dformat("%i", bytes);
379 else if (bytes < (10l * kB))
380 result += local_dformat("%.2fk", ((float)bytes / (float)kB));
381 else if (bytes < (100l * kB))
382 result += local_dformat("%.1fk", ((float)bytes / (float)kB));
383 else if (bytes < MB)
384 result += local_dformat("%.0fk", ((float)bytes / (float)kB));
385 else if (bytes < (10l * MB))
386 result += local_dformat("%.2fM", ((float)bytes / (float)MB));
387 else if (bytes < (100l * MB))
388 result += local_dformat("%.1fM", ((float)bytes / (float)MB));
389 else if (bytes < GB)
390 result += local_dformat("%.0fM", ((float)bytes / (float)MB));
391 else
392 result += local_dformat("%.2fG", ((float)bytes / (float)GB));
393 return result;
394 }
395
396 std::string display_time(time_t seconds)
397 {
398 unsigned minutes = (unsigned)seconds / 60;
399 seconds %= 60;
400 unsigned hours = minutes / 60;
401 minutes %= 60;
402 unsigned days = hours / 24;
403 hours %= 24;
404 unsigned weeks = days / 7;
405 days %= 7;
406 std::string result;
407 if (weeks > 0)
408 {
409 result += unsigned_to_string(weeks, 10, radix_none, 1);
410 result += "w ";
411 }
412 if (!result.empty() || days > 0)
413 {
414 result += unsigned_to_string(days, 10, radix_none, 1);
415 result += "d ";
416 }
417 if (!result.empty() || hours > 0)
418 {
419 result += unsigned_to_string(hours, 10, radix_none, 1);
420 result += ":";
421 }
422 if (!result.empty() || minutes > 0)
423 {
424 if (!result.empty())
425 result += unsigned_to_string(minutes, 10, radix_none, 2);
426 else
427 result += unsigned_to_string(minutes, 10, radix_none, 1);
428 result += ":";
429 }
430 if (!result.empty())
431 result += unsigned_to_string((unsigned)seconds, 10, radix_none, 2);
432 else
433 {
434 result += unsigned_to_string((unsigned)seconds, 10, radix_none, 1);
435 result += "s";
436 }
437 return result;
438 }
439
440 ////////////////////////////////////////////////////////////////////////////////
441
442 } // end namespace stlplus
This page took 0.052179 seconds and 4 git commands to generate.