Browse Source

Clean up UTF-8 parsing and use CLZ if available

zrythm_meson
David Robillard 6 years ago
parent
commit
76e9a6530f
  1. 56
      src/writer.c
  2. 5
      wscript

56
src/writer.c

@ -158,42 +158,44 @@ sink(const void* buf, size_t len, SerdWriter* writer) @@ -158,42 +158,44 @@ sink(const void* buf, size_t len, SerdWriter* writer)
return serd_byte_sink_write(buf, len, &writer->byte_sink);
}
// Parse a UTF-8 character, set *size to the length, and return the code point
// Return the number of bytes in a UTF-8 character
static inline uint32_t
parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
utf8_num_bytes(const uint8_t* utf8)
{
uint32_t c = 0;
if ((utf8[0] & 0x80) == 0) { // Starts with `0'
*size = 1;
c = utf8[0];
} else if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110'
*size = 2;
c = utf8[0] & 0x1F;
return 1;
}
#ifdef HAVE_BUILTIN_CLZ
return __builtin_clz(~utf8[0] << 24);
#else
if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110'
return 2;
} else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110'
*size = 3;
c = utf8[0] & 0x0F;
return 3;
} else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110'
*size = 4;
c = utf8[0] & 0x07;
} else {
w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
*size = 0;
return 0;
return 4;
}
return 0;
#endif
}
size_t i = 0;
uint8_t in = utf8[i++];
#define READ_BYTE() \
in = utf8[i++] & 0x3F; \
c = (c << 6) | in;
switch (*size) {
case 4: READ_BYTE();
case 3: READ_BYTE();
case 2: READ_BYTE();
// Parse a UTF-8 character, set *size to the length, and return the code point
static inline uint32_t
parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
{
switch (*size = utf8_num_bytes(utf8)) {
case 1: case 2: case 3: case 4:
break;
default:
return 0;
}
uint32_t c = utf8[0] & ((1 << (8 - *size)) - 1);
for (size_t i = 1; i < *size; ++i) {
const uint8_t in = utf8[i] & 0x3F;
c = (c << 6) | in;
}
return c;
}

5
wscript

@ -78,6 +78,11 @@ def configure(conf): @@ -78,6 +78,11 @@ def configure(conf):
defines = ['_POSIX_C_SOURCE=200809L'],
mandatory = False)
conf.check(fragment = 'int main() { return __builtin_clz(1); }',
function_name = '__builtin_clz',
define_name = 'HAVE_BUILTIN_CLZ',
mandatory = False)
autowaf.define(conf, 'SERD_VERSION', SERD_VERSION)
autowaf.set_lib_env(conf, 'serd', SERD_VERSION)
conf.write_config_header('serd_config.h', remove=False)

Loading…
Cancel
Save