X-Git-Url: https://git.dogcows.com/gitweb?p=chaz%2Fyoink;a=blobdiff_plain;f=src%2Fmoof%2Fstring.cc;fp=src%2Fmoof%2Fstring.cc;h=61bbf8fa6e77982a92b2c4fdf1542a24ab96a42b;hp=a2c27bb25cf3b670d9b04c66c498c55e53530f9e;hb=6f1b787a10d8ab1a3117a4b8c004dd2d90599608;hpb=c143f7e806766a73cd69dc6e084e977641019ce6 diff --git a/src/moof/string.cc b/src/moof/string.cc index a2c27bb..61bbf8f 100644 --- a/src/moof/string.cc +++ b/src/moof/string.cc @@ -11,129 +11,308 @@ #include +#include + #include "ConvertUTF.h" +#include "script.hh" #include "string.hh" namespace moof { -// TODO this code is ugly - wstring multi_to_wide(const string& multi) { - size_t length = multi.length(); + typedef boost::shared_array buffer; if (sizeof(wchar_t) == 2) { - wchar_t* wide = new wchar_t[length + 1]; - - const UTF8* srcStart = reinterpret_cast(multi.c_str()); - const UTF8* srcEnd = srcStart + length; - UTF16* targetStart = reinterpret_cast(wide); - UTF16* targetEnd = targetStart + length+1; + size_t length = multi.length(); + buffer wide(new wchar_t[length + 1]); + const UTF8* src1 = (const UTF8*)multi.c_str(); + const UTF8* src2 = src1 + length; + UTF16* dst1 = (UTF16*)wide.get(); + UTF16* dst2 = dst1 + length+1; - ConversionResult res = ConvertUTF8toUTF16(&srcStart, srcEnd, - &targetStart, targetEnd, lenientConversion); - if (res != conversionOK) + if (ConvertUTF8toUTF16(&src1, src2, + &dst1, dst2, lenientConversion) != conversionOK) { - delete[] wide; - throw std::runtime_error("bad conversion from multi to wide characters"); + throw std::runtime_error("bad string conversion"); } - *targetStart = 0; - wstring convertedStr(wide); - delete[] wide; - - return convertedStr; + *dst1 = 0; + wstring str(wide.get()); + return str; } else if (sizeof(wchar_t) == 4) { - wchar_t* wide = new wchar_t[length]; - - const UTF8* srcStart = reinterpret_cast(multi.c_str()); - const UTF8* srcEnd = srcStart + length; - UTF32* targetStart = reinterpret_cast(wide); - UTF32* targetEnd = targetStart + length; + size_t length = multi.length(); + buffer wide(new wchar_t[length + 1]); + const UTF8* src1 = (const UTF8*)multi.c_str(); + const UTF8* src2 = src1 + length; + UTF32* dst1 = (UTF32*)wide.get(); + UTF32* dst2 = dst1 + length+1; - ConversionResult res = ConvertUTF8toUTF32(&srcStart, srcEnd, - &targetStart, targetEnd, lenientConversion); - if (res != conversionOK) + if (ConvertUTF8toUTF32(&src1, src2, + &dst1, dst2, lenientConversion) != conversionOK) { - delete[] wide; - throw std::runtime_error("bad conversion from multi to wide characters"); + throw std::runtime_error("bad string conversion"); } - *targetStart = 0; - wstring convertedStr(wide); - delete[] wide; - - return convertedStr; + *dst1 = 0; + wstring str(wide.get()); + return str; } else { throw std::runtime_error("unknown size of wide characters"); } - return L""; } string wide_to_multi(const wstring& wide) { - size_t length = wide.length(); + typedef boost::shared_array buffer; if (sizeof(wchar_t) == 2) { - size_t multiLength = 3 * length + 1; - char* multi = new char[multiLength]; - - const UTF16* srcStart = reinterpret_cast(wide.c_str()); - const UTF16* srcEnd = srcStart + length; - UTF8* targetStart = reinterpret_cast(multi); - UTF8* targetEnd = targetStart + multiLength; + size_t length = wide.length(); + size_t multi_length = 3 * length + 1; + buffer multi(new char[multi_length]); + const UTF16* src1 = (const UTF16*)wide.c_str(); + const UTF16* src2 = src1 + length; + UTF8* dst1 = (UTF8*)multi.get(); + UTF8* dst2 = dst1 + multi_length; - ConversionResult res = ConvertUTF16toUTF8(&srcStart, srcEnd, - &targetStart, targetEnd, lenientConversion); - if (res != conversionOK) + if (ConvertUTF16toUTF8(&src1, src2, + &dst1, dst2, lenientConversion) != conversionOK) { - delete[] multi; - throw std::runtime_error("bad conversion from wide to multi-characters"); + throw std::runtime_error("bad string conversion"); } - *targetStart = 0; - string convertedStr(multi); - delete[] multi; - - return convertedStr; + *dst1 = 0; + string str(multi.get()); + return str; } else if (sizeof(wchar_t) == 4) { - size_t multiLength = 4 * length + 1; - char* multi = new char[multiLength]; + size_t length = wide.length(); + size_t multi_length = 4 * length + 1; + buffer multi(new char[multi_length]); + const UTF32* src1 = (const UTF32*)wide.c_str(); + const UTF32* src2 = src1 + length; + UTF8* dst1 = (UTF8*)multi.get(); + UTF8* dst2 = dst1 + multi_length; - const UTF32* srcStart = reinterpret_cast(wide.c_str()); - const UTF32* srcEnd = srcStart + length; - UTF8* targetStart = reinterpret_cast(multi); - UTF8* targetEnd = targetStart + multiLength; - - ConversionResult res = ConvertUTF32toUTF8(&srcStart, srcEnd, - &targetStart, targetEnd, lenientConversion); - if (res != conversionOK) + if (ConvertUTF32toUTF8(&src1, src2, + &dst1, dst2, lenientConversion) != conversionOK) { - delete[] multi; - throw std::runtime_error("bad conversion from wide to multi-characters"); + throw std::runtime_error("bad string conversion"); } - *targetStart = 0; - string convertedStr(multi); - delete[] multi; - - return convertedStr; + *dst1 = 0; + string str(multi.get()); + return str; } else { throw std::runtime_error("unknown size of wide characters"); } - return ""; +} + + +static script& regex_script() +{ + static script script; + static bool init = true; + if (init) + { + script.import_string_library(); + script.globals().push_field("string").push_field("match"); + script.globals().set_field("match"); + script.top().push_field("gmatch"); + script.globals().set_field("gmatch"); + script.top().push_field("gsub"); + script.globals().set_field("gsub"); + script.push_nil(); + script.globals().set_field("string"); + script.pop(); + init = false; + } + return script; +} + + +regex::regex(const string& pattern) +{ + regex::pattern(pattern); +} + +regex::regex(const string& pattern, const string& source) +{ + regex::pattern(pattern); + match(source); +} + +regex::~regex() +{ + script& script = regex_script(); + + script.push_pointer(this); + script.push_nil(); + script.globals().set_field(); + script.push_pointer(this); + script.push_nil(); + script.registry().set_field(); +} + + +string regex::pattern() const +{ + script& script = regex_script(); + script.push_pointer(this); + script::slot saved = script.registry().push_field(); + + string pattern; + saved.get(pattern); + + saved.pop(); + return pattern; +} + +void regex::pattern(const string& pattern) +{ + script& script = regex_script(); + script.push_pointer(this); + script.push(pattern); + script.registry().set_field(); +} + + +void regex::match(const string& source) +{ + script& script = regex_script(); + script.push_pointer(this); + + script.globals().push_field("gmatch"); + script.push(source); + script.push_pointer(this); + script.registry().push_field(); + script.call(2, 1); + + script.globals().set_field(); +} + +bool regex::get(string& match) +{ + script& script = regex_script(); + script.push_pointer(this); + script::slot value = script.globals().push_field(); + if (!value.is_function()) + { + script.clear_stack(); + return false; + } + + script.call(0, 1); + bool result = value.get(match); + script.pop(); + return result; +} + +bool regex::get(std::vector& captures) +{ + script& script = regex_script(); + script.push_pointer(this); + script::slot value = script.globals().push_field(); + if (!value.is_function()) + { + script.clear_stack(); + return false; + } + + script.call(); + captures.clear(); + + while (value.is_string()) + { + captures.resize(captures.size() + 1); + value.get(captures.back()); + ++value.index; + } + + script.clear_stack(); + return 0 < captures.size(); +} + + +bool regex::match(string& match, + const string& pattern, + const string& source, + int position) +{ + script& script = regex_script(); + + script::slot value = script.globals().push_field("match"); + script.push(source); + script.push(pattern); + ++position; // Lua indices count from one. + script.push(position); + script.call(3, 1); + + bool result = value.get(match); + script.clear_stack(); + return result; +} + +bool regex::match(std::vector& captures, + const string& pattern, + const string& source, + int position) +{ + script& script = regex_script(); + + script::slot value = script.globals().push_field("match"); + script.push(source); + script.push(pattern); + ++position; // Lua indices count from one. + script.push(position); + script.call(3); + + captures.clear(); + + while (value.is_string()) + { + captures.resize(captures.size() + 1); + value.get(captures.back()); + ++value.index; + } + + script.clear_stack(); + return 0 < captures.size(); +} + + +int regex::sub(string& substitution, + const string& pattern, + const string& source, + const string& replacement) +{ + script& script = regex_script(); + + script::slot value = script.globals().push_field("gsub"); + script.push(source); + script.push(pattern); + script.push(replacement); + script.call(3, 2); + + value.get(substitution); + + ++value.index; + int count = 0; + value.get(count); + + script.clear_stack(); + return count; }