X-Git-Url: https://git.dogcows.com/gitweb?a=blobdiff_plain;f=src%2Fstlplus%2Fstrings%2Fstring_utilities.cpp;fp=src%2Fstlplus%2Fstrings%2Fstring_utilities.cpp;h=9ff5345c7615958264c7160a92ac36e548e1cb9e;hb=6b0a0d0efafe34d48ab344fca3b479553bd4e62c;hp=0000000000000000000000000000000000000000;hpb=85783316365181491a3e3c0c63659972477cebba;p=chaz%2Fyoink diff --git a/src/stlplus/strings/string_utilities.cpp b/src/stlplus/strings/string_utilities.cpp new file mode 100644 index 0000000..9ff5345 --- /dev/null +++ b/src/stlplus/strings/string_utilities.cpp @@ -0,0 +1,442 @@ +//////////////////////////////////////////////////////////////////////////////// + +// Author: Andy Rushton +// Copyright: (c) Southampton University 1999-2004 +// (c) Andy Rushton 2004-2009 +// License: BSD License, see ../docs/license.html + +//////////////////////////////////////////////////////////////////////////////// +#include "string_utilities.hpp" +#include "string_basic.hpp" +#include +#include +#include +#include + +namespace stlplus +{ + + // added as a local copy to break the dependency on the portability library + static std::string local_dformat(const char* format, ...) throw(std::invalid_argument) + { + std::string formatted; + va_list args; + va_start(args, format); +#ifdef MSWINDOWS + int length = 0; + char* buffer = 0; + for(int buffer_length = 256; ; buffer_length*=2) + { + buffer = (char*)malloc(buffer_length); + if (!buffer) throw std::invalid_argument("string_utilities"); + length = _vsnprintf(buffer, buffer_length-1, format, args); + if (length >= 0) + { + buffer[length] = 0; + formatted += std::string(buffer); + free(buffer); + break; + } + free(buffer); + } +#else + char* buffer = 0; + int length = vasprintf(&buffer, format, args); + if (!buffer) throw std::invalid_argument("string_utilities"); + if (length >= 0) + formatted += std::string(buffer); + free(buffer); +#endif + va_end(args); + if (length < 0) throw std::invalid_argument("string_utilities"); + return formatted; + } + + //////////////////////////////////////////////////////////////////////////////// + + std::string pad(const std::string& str, alignment_t alignment, unsigned width, char padch) + throw(std::invalid_argument) + { + std::string result = str; + switch(alignment) + { + case align_left: + { + unsigned padding = width>str.size() ? width - str.size() : 0; + unsigned i = 0; + while (i++ < padding) + result.insert(result.end(), padch); + break; + } + case align_right: + { + unsigned padding = width>str.size() ? width - str.size() : 0; + unsigned i = 0; + while (i++ < padding) + result.insert(result.begin(), padch); + break; + } + case align_centre: + { + unsigned padding = width>str.size() ? width - str.size() : 0; + unsigned i = 0; + while (i++ < padding/2) + result.insert(result.end(), padch); + i--; + while (i++ < padding) + result.insert(result.begin(), padch); + break; + } + default: + throw std::invalid_argument("invalid alignment value"); + } + return result; + } + + //////////////////////////////////////////////////////////////////////////////// + + std::string trim_left(const std::string& val) + { + std::string result = val; + while (!result.empty() && isspace(result[0])) + result.erase(result.begin()); + return result; + } + + std::string trim_right(const std::string& val) + { + std::string result = val; + while (!result.empty() && isspace(result[result.size()-1])) + result.erase(result.end()-1); + return result; + } + + std::string trim(const std::string& val) + { + std::string result = val; + while (!result.empty() && isspace(result[0])) + result.erase(result.begin()); + while (!result.empty() && isspace(result[result.size()-1])) + result.erase(result.end()-1); + return result; + } + + //////////////////////////////////////////////////////////////////////////////// + + std::string lowercase(const std::string& val) + { + std::string text = val; + for (unsigned i = 0; i < text.size(); i++) + text[i] = tolower(text[i]); + return text; + } + + std::string uppercase(const std::string& val) + { + std::string text = val; + for (unsigned i = 0; i < text.size(); i++) + text[i] = toupper(text[i]); + return text; + } + + //////////////////////////////////////////////////////////////////////////////// + + std::string translate(const std::string& input, const std::string& from_set, const std::string& to_set) + { + std::string result; + for (unsigned i = 0; i < input.size(); i++) + { + char ch = input[i]; + // check to see if the character is in the from set + std::string::size_type found = from_set.find(ch); + if (found == std::string::npos) + { + // not found so just copy across + result += ch; + } + else if (found < to_set.size()) + { + // found and in range so translate + result += to_set[found]; + } + } + return result; + } + + //////////////////////////////////////////////////////////////////////////////// + // WARNING: wheel re-invention follows + // Given that all shells perform wildcard matching, why don't the library writers put it in the C run-time???????? + // The problem: + // * matches any number of characters - this is achieved by matching 1 and seeing if the remainder matches + // if not, try 2 characters and see if the remainder matches etc. + // this must be recursive, not iterative, so that multiple *s can appear in the same wildcard expression + // ? matches exactly one character so doesn't need the what-if approach + // \ escapes special characters such as *, ? and [ + // [] matches exactly one character in the set - the difficulty is the set can contain ranges, e.g [a-zA-Z0-9] + // a set cannot be empty and the ] character can be included by making it the first character + + // function for testing whether a character matches a set + // I can't remember the exact rules and I have no definitive references but: + // a set contains characters, escaped characters (I think) and ranges in the form a-z + // The character '-' can only appear at the start of the set where it is not interpreted as a range + // This is a horrible mess - blame the Unix folks for making a hash of wildcards + + static bool match_set (const std::string& set, char match) + { + // first expand any ranges and remove escape characters to make life more palatable + std::string simple_set; + for (std::string::const_iterator i = set.begin(); i != set.end(); ++i) + { + switch(*i) + { + case '-': + { + if (i == set.begin()) + { + simple_set += *i; + } + else if (i+1 == set.end()) + { + return false; + } + else + { + // found a set. The first character is already in the result, so first remove it (the set might be empty) + simple_set.erase(simple_set.end()-1); + char last = *++i; + for (char ch = *(i-2); ch <= last; ch++) + { + simple_set += ch; + } + } + break; + } + case '\\': + if (i+1 == set.end()) {return false;} + simple_set += *++i; + break; + default: + simple_set += *i; + break; + } + } + std::string::size_type result = simple_set.find(match); + return result != std::string::npos; + } + + // the recursive bit - basically whenever a * is found you recursively call this for each candidate substring match + // until either it succeeds or you run out of string to match + // for each * in the wildcard another level of recursion is created + + static bool match_remainder (const std::string& wild, std::string::const_iterator wildi, + const std::string& match, std::string::const_iterator matchi) + { + //cerr << "match_remainder called at " << *matchi << " with wildcard " << *wildi << endl; + while (wildi != wild.end() && matchi != match.end()) + { + //cerr << "trying to match " << *matchi << " with wildcard " << *wildi << endl; + switch(*wildi) + { + case '*': + { + ++wildi; + ++matchi; + for (std::string::const_iterator i = matchi; i != match.end(); ++i) + { + // deal with * at the end of the wildcard - there is no remainder then + if (wildi == wild.end()) + { + if (i == match.end()-1) + return true; + } + else if (match_remainder(wild, wildi, match, i)) + { + return true; + } + } + return false; + } + case '[': + { + // scan for the end of the set using a similar method for avoiding escaped characters + bool found = false; + std::string::const_iterator end = wildi + 1; + for (; !found && end != wild.end(); ++end) + { + switch(*end) + { + case ']': + { + // found the set, now match with its contents excluding the brackets + if (!match_set(wild.substr(wildi - wild.begin() + 1, end - wildi - 1), *matchi)) + return false; + found = true; + break; + } + case '\\': + if (end == wild.end()-1) + return false; + ++end; + break; + default: + break; + } + } + if (!found) + return false; + ++matchi; + wildi = end; + break; + } + case '?': + ++wildi; + ++matchi; + break; + case '\\': + if (wildi == wild.end()-1) + return false; + ++wildi; + if (*wildi != *matchi) + return false; + ++wildi; + ++matchi; + break; + default: + if (*wildi != *matchi) + return false; + ++wildi; + ++matchi; + break; + } + } + bool result = wildi == wild.end() && matchi == match.end(); + return result; + } + + // like all recursions the exported function has a simpler interface than the + // recursive function and is just a 'seed' to the recursion itself + + bool match_wildcard(const std::string& wild, const std::string& match) + { + return match_remainder(wild, wild.begin(), match, match.begin()); + } + + //////////////////////////////////////////////////////////////////////////////// + + std::vector split(const std::string& str, const std::string& splitter) + { + std::vector result; + if (!str.empty()) + { + for(std::string::size_type offset = 0;;) + { + std::string::size_type found = str.find(splitter, offset); + if (found != std::string::npos) + { + result.push_back(str.substr(offset, found-offset)); + offset = found + splitter.size(); + } + else + { + result.push_back(str.substr(offset, str.size()-offset)); + break; + } + } + } + return result; + } + + std::string join (const std::vector& str, + const std::string& joiner, + const std::string& prefix, + const std::string& suffix) + { + std::string result = prefix; + for (unsigned i = 0; i < str.size(); i++) + { + if (i) result += joiner; + result += str[i]; + } + result += suffix; + return result; + } + + //////////////////////////////////////////////////////////////////////////////// + + std::string display_bytes(long bytes) + { + std::string result; + if (bytes < 0) + { + result += '-'; + bytes = -bytes; + } + static const long kB = 1024l; + static const long MB = kB * kB; + static const long GB = MB * kB; + if (bytes < kB) + result += local_dformat("%i", bytes); + else if (bytes < (10l * kB)) + result += local_dformat("%.2fk", ((float)bytes / (float)kB)); + else if (bytes < (100l * kB)) + result += local_dformat("%.1fk", ((float)bytes / (float)kB)); + else if (bytes < MB) + result += local_dformat("%.0fk", ((float)bytes / (float)kB)); + else if (bytes < (10l * MB)) + result += local_dformat("%.2fM", ((float)bytes / (float)MB)); + else if (bytes < (100l * MB)) + result += local_dformat("%.1fM", ((float)bytes / (float)MB)); + else if (bytes < GB) + result += local_dformat("%.0fM", ((float)bytes / (float)MB)); + else + result += local_dformat("%.2fG", ((float)bytes / (float)GB)); + return result; + } + + std::string display_time(time_t seconds) + { + unsigned minutes = (unsigned)seconds / 60; + seconds %= 60; + unsigned hours = minutes / 60; + minutes %= 60; + unsigned days = hours / 24; + hours %= 24; + unsigned weeks = days / 7; + days %= 7; + std::string result; + if (weeks > 0) + { + result += unsigned_to_string(weeks, 10, radix_none, 1); + result += "w "; + } + if (!result.empty() || days > 0) + { + result += unsigned_to_string(days, 10, radix_none, 1); + result += "d "; + } + if (!result.empty() || hours > 0) + { + result += unsigned_to_string(hours, 10, radix_none, 1); + result += ":"; + } + if (!result.empty() || minutes > 0) + { + if (!result.empty()) + result += unsigned_to_string(minutes, 10, radix_none, 2); + else + result += unsigned_to_string(minutes, 10, radix_none, 1); + result += ":"; + } + if (!result.empty()) + result += unsigned_to_string((unsigned)seconds, 10, radix_none, 2); + else + { + result += unsigned_to_string((unsigned)seconds, 10, radix_none, 1); + result += "s"; + } + return result; + } + + //////////////////////////////////////////////////////////////////////////////// + +} // end namespace stlplus