31#include "Exception.hpp"
50 const unsigned char ch =
static_cast<unsigned char>(*str);
51 if ((ch & 0xF0) == 0xE0) {
53 }
else if ((ch & 0x80) == 0x00) {
55 }
else if ((ch & 0xE0) == 0xC0) {
57 }
else if ((ch & 0xF8) == 0xF0) {
59 }
else if ((ch & 0xFC) == 0xF8) {
61 }
else if ((ch & 0xFE) == 0xFC) {
82 const char* candidate = str - 1;
84 while (distance < 6) {
85 const unsigned char ch =
static_cast<unsigned char>(*candidate);
86 if ((ch & 0xC0) != 0x80) {
94 if (length == distance) {
122 while (*str !=
'\0') {
131 while (i < charLen && str[i] !=
'\0') {
160 return ch ==
'\0' || ch ==
'\n' || ch ==
'\r';
166 static std::string
FromSubstr(
const char* str,
size_t length) {
168 newStr.resize(length);
169 strncpy(newStr.data(), str, length);
178 while (byteLength > 0) {
192 static std::string
TruncateUTF8(
const char* str,
size_t maxByteLength) {
193 std::string wordTrunc;
196 const char* pStr = str;
199 if (len + charLength > maxByteLength) {
215 static void ReplaceAll(std::string& str,
const char* from,
const char* to) {
216 std::string::size_type pos = 0;
217 std::string::size_type fromLen = strlen(from);
218 std::string::size_type toLen = strlen(to);
219 while ((pos = str.find(from, pos)) != std::string::npos) {
220 str.replace(pos, fromLen, to);
228 static std::string
Join(
const std::vector<std::string>& strings,
229 const std::string& separator) {
230 std::ostringstream buffer;
232 for (
const auto& str : strings) {
245 static std::string
Join(
const std::vector<std::string>& strings) {
246 std::ostringstream buffer;
247 for (
const auto& str : strings) {
253 static void GetByteMap(
const char* str,
const size_t utf8Length,
254 std::vector<size_t>* byteMap) {
255 if (byteMap->size() < utf8Length) {
256 byteMap->resize(utf8Length);
258 const char* pstr = str;
259 for (
size_t i = 0; i < utf8Length; i++) {
260 (*byteMap)[i] = pstr - str;
261 pstr = NextChar(pstr);
266 static std::wstring GetPlatformString(
const std::string& str) {
270 static std::string GetPlatformString(
const std::string& str) {
return str; }
274 static std::string U16ToU8(
const std::wstring& wstr) {
276 int length =
static_cast<int>(wstr.length());
277 int convcnt = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, NULL, 0,
281 WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, &ret[0], convcnt,
287 static std::wstring U8ToU16(
const std::string& str) {
289 int length =
static_cast<int>(str.length());
290 int convcnt = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), length, NULL, 0);
293 MultiByteToWideChar(CP_UTF8, 0, str.c_str(), length, &ret[0], convcnt);
Definition Exception.hpp:77
UTF8 std::string utilities.
Definition UTF8Util.hpp:38
static bool IsLineEndingOrFileEnding(const char ch)
Returns true if the character is a line ending or end of file.
Definition UTF8Util.hpp:159
static size_t PrevCharLength(const char *str)
Returns the length in byte for the previous UTF8 character.
Definition UTF8Util.hpp:81
static std::string FromSubstr(const char *str, size_t length)
Copies a substring with given length to a new string.
Definition UTF8Util.hpp:166
static void ReplaceAll(std::string &str, const char *from, const char *to)
Replaces all patterns in a std::string in place.
Definition UTF8Util.hpp:215
static void SkipUtf8Bom(FILE *fp)
Detect UTF8 BOM and skip it.
Definition UTF8Util.cpp:23
static size_t NextCharLengthNoException(const char *str)
Returns the length in byte for the next UTF8 character.
Definition UTF8Util.hpp:49
static bool NotShorterThan(const char *str, size_t byteLength)
Returns true if the given std::string is longer or as long as the given length.
Definition UTF8Util.hpp:177
static std::string Join(const std::vector< std::string > &strings)
Joins a std::string vector in to a std::string.
Definition UTF8Util.hpp:245
static std::string TruncateUTF8(const char *str, size_t maxByteLength)
Truncates a std::string with a maximal length in byte.
Definition UTF8Util.hpp:192
static size_t Length(const char *str)
Returns the UTF8 length of a null-terminated string.
Definition UTF8Util.hpp:120
static const char * FindNextInline(const char *str, const char ch)
Finds a character in the same line.
Definition UTF8Util.hpp:149
static size_t NextCharLength(const char *str)
Returns the length in byte for the next UTF8 character.
Definition UTF8Util.hpp:70
static std::string Join(const std::vector< std::string > &strings, const std::string &separator)
Joins a std::string vector in to a std::string with a separator.
Definition UTF8Util.hpp:228
static const char * PrevChar(const char *str)
Move the char* pointer before the previous UTF8 character.
Definition UTF8Util.hpp:110
static const char * NextChar(const char *str)
Returns the char* pointer over the next UTF8 character.
Definition UTF8Util.hpp:103