This should work fine, but you need to use std::wregexand std::wsmatch. You will need to convert the original string and regular expression to wide-angle unicode (UTF-32 on Linux, UTF-16 (ish) on Windows) to make it work.
This works for me where the source code is UTF-8:
inline std::wstring from_utf8(const std::string& utf8)
{
}
inline std::string to_utf8(const std::wstring& ws)
{
}
int main()
{
std::string test = "john.doe@神谕.com";
std::string expr = "[\\u0080-\\uDB7F]+";
std::wstring wtest = from_utf8(test);
std::wstring wexpr = from_utf8(expr);
std::wregex we(wexpr);
std::wsmatch wm;
if(std::regex_search(wtest, wm, we))
{
std::cout << to_utf8(wm.str(0)) << '\n';
}
}
Conclusion:
神谕
. UTF, strong > .
: , :
++?