Make utf8_get_char() and utf8_next_char() actually do what their names say

This commit is contained in:
Chris Angelico 2014-06-08 06:10:45 +10:00
parent bc990dad9a
commit 30d1bad33f
1 changed files with 16 additions and 2 deletions

View File

@ -66,13 +66,27 @@ STATIC const uint8_t attr[] = {
}; };
unichar utf8_get_char(const char *s) { unichar utf8_get_char(const char *s) {
return *s; unichar ord = *s++;
if (!UTF8_IS_NONASCII(ord)) return ord;
ord &= 0x7F;
for (unichar mask = 0x40; ord & mask; mask >>= 1) {
ord &= ~mask;
}
while (UTF8_IS_CONT(*s)) {
ord = (ord << 6) | (*s++ & 0x3F);
}
return ord;
} }
char *utf8_next_char(const char *s) { char *utf8_next_char(const char *s) {
return (char*)(s + 1); ++s;
while (UTF8_IS_CONT(*s)) {
++s;
}
return (char *)s;
} }
// Be aware: These unichar_is* functions are actually ASCII-only!
bool unichar_isspace(unichar c) { bool unichar_isspace(unichar c) {
return c < 128 && (attr[c] & FL_SPACE) != 0; return c < 128 && (attr[c] & FL_SPACE) != 0;
} }