From 51a912ff3b97de237d734465b9af5b7c8be0a54e Mon Sep 17 00:00:00 2001 From: martii Date: Sun, 15 Dec 2013 14:44:26 +0100 Subject: [PATCH] src/eitd/edvbstring: unbreak isUTF8() Origin commit data ------------------ Branch: ni/coolstream Commit: https://github.com/neutrino-images/ni-neutrino/commit/9ead2b459b5133d9872424527e6d47b0f70d0cc8 Author: martii Date: 2013-12-15 (Sun, 15 Dec 2013) ------------------ No further description and justification available within origin commit message! ------------------ This commit was generated by Migit --- src/eitd/edvbstring.cpp | 44 +++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/eitd/edvbstring.cpp b/src/eitd/edvbstring.cpp index fa0fa1459..003e067bf 100644 --- a/src/eitd/edvbstring.cpp +++ b/src/eitd/edvbstring.cpp @@ -2301,30 +2301,40 @@ int isUTF8(const std::string &string) { unsigned int len=string.size(); - for (unsigned int i=0; i < len; ++i) + for (unsigned int i=0; i < len;) { - if (!(string[i]&0x80)) // normal ASCII + int trailing = 0; + if (string[i] >> 7 == 0) // 0xxxxxxx + { + i++; continue; - if ((string[i] & 0xE0) == 0xC0) // one char following. + } + if (string[i] >> 5 == 6) // 110xxxxx 10xxxxxx { - // first, length check: - if (i+1 >= len) - return 0; // certainly NOT utf-8 - i++; - if ((string[i]&0xC0) != 0x80) - return 0; // no, not UTF-8. - } else if ((string[i] & 0xF0) == 0xE0) + if (++i >= len) + return 0; + trailing = 1; + } + else if (string[i] >> 4 == 14) // 1110xxxx 10xxxxxx 10xxxxxx { - if ((i+1) >= len) + if (++i >= len) return 0; + trailing = 2; + } + else if ((string[i] >> 3) == 30) // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + { + if (++i >= len) + return 0; + trailing = 3; + } else + return 0; + + while (trailing) { + if (i >= len || string[i] >> 6 != 2) + return 0; + trailing--; i++; - if ((string[i]&0xC0) != 0x80) - return 0; - i++; - if ((string[i]&0xC0) != 0x80) - return 0; } } return 1; // can be UTF8 (or pure ASCII, at least no non-UTF-8 8bit characters) } -