Browse Source

Rework character reading functions to support reading multi-byte characters (take a string dest parameter instead of returning uchar).

Escape ntriples output.
Pass all good read tests with output verification.


git-svn-id: http://svn.drobilla.net/serd/trunk@8 490d8e77-9747-427b-9fa3-0b8f29cee8a0
zrythm_meson
David Robillard 12 years ago
parent
commit
0a62fc5f6a
  1. 8
      doc/reference.doxygen.in
  2. 105
      serd/serd.h
  3. 46
      src/namespaces.c
  4. 287
      src/reader.c
  5. 84
      src/serdi.c
  6. 65
      src/string.c
  7. 49
      src/uri.c
  8. 170
      src/write.c
  9. 8
      wscript

8
doc/reference.doxygen.in

@ -270,7 +270,7 @@ SUBGROUPING = YES @@ -270,7 +270,7 @@ SUBGROUPING = YES
# be useful for C code in case the coding convention dictates that all compound
# types are typedef'ed and only the typedef is referenced, never the tag name.
TYPEDEF_HIDES_STRUCT = NO
TYPEDEF_HIDES_STRUCT = YES
# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
# determine which symbols to keep in memory and which to flush to disk.
@ -297,7 +297,7 @@ SYMBOL_CACHE_SIZE = 0 @@ -297,7 +297,7 @@ SYMBOL_CACHE_SIZE = 0
# Private class members and static file members will be hidden unless
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
EXTRACT_ALL = NO
EXTRACT_ALL = YES
# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
# will be included in the documentation.
@ -480,14 +480,14 @@ SHOW_DIRECTORIES = NO @@ -480,14 +480,14 @@ SHOW_DIRECTORIES = NO
# This will remove the Files entry from the Quick Index and from the
# Folder Tree View (if specified). The default is YES.
SHOW_FILES = YES
SHOW_FILES = NO
# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
# Namespaces page.
# This will remove the Namespaces entry from the Quick Index
# and from the Folder Tree View (if specified). The default is YES.
SHOW_NAMESPACES = YES
SHOW_NAMESPACES = NO
# The FILE_VERSION_FILTER tag can be used to specify a program or script that
# doxygen should invoke to get the current version for each file (typically from

105
serd/serd.h

@ -49,6 +49,10 @@ @@ -49,6 +49,10 @@
* @{
*/
typedef struct SerdNamespacesImpl* SerdNamespaces;
typedef struct SerdReaderImpl* SerdReader;
/** RDF syntax */
typedef enum {
SERD_TURTLE = 1,
@ -57,56 +61,31 @@ typedef enum { @@ -57,56 +61,31 @@ typedef enum {
/** Type of RDF node. */
typedef enum {
BLANK = 1,
URI = 2,
QNAME = 3,
LITERAL = 4
BLANK = 1, ///< Blank node (resource with no URI)
URI = 2, ///< URI (universal identifier)
QNAME = 3, ///< CURIE/QName (URI shortened with a namespace)
LITERAL = 4 ///< Literal string (with optional lang or datatype)
} SerdNodeType;
/** @name String
* @{
*/
/** Measured UTF-8 string. */
typedef struct {
size_t n_bytes;
size_t n_chars;
uint8_t buf[];
} SerdString;
/** Create a new UTF-8 string from @a utf8. */
SERD_API
SerdString*
serd_string_new(const uint8_t* utf8);
/** Copy @a string. */
SERD_API
SerdString*
serd_string_copy(const SerdString* string);
/** @} */
/** @name URIs
* @{
*/
/** Range of memory. */
/* Range of memory. */
typedef struct {
const uint8_t* buf;
size_t len;
} SerdRange;
/** Parsed URI. */
/* Parsed URI. */
typedef struct {
SerdRange scheme;
SerdRange authority;
SerdRange path_base;
SerdRange path;
SerdRange query;
SerdRange fragment;
bool base_uri_has_authority;
SerdRange scheme; ///< Scheme
SerdRange authority; ///< Authority
SerdRange path_base; ///< Path prefix if relative
SerdRange path; ///< Path suffix
SerdRange query; ///< Query
SerdRange fragment; ///< Fragment
bool base_uri_has_authority; ///< True iff base URI has authority
} SerdURI;
/** Return true iff @a utf8 is a relative URI string. */
@ -129,11 +108,52 @@ SERD_API @@ -129,11 +108,52 @@ SERD_API
bool
serd_uri_write(const SerdURI* uri, FILE* file);
/** Sink function for raw string output. */
typedef size_t (*SerdSink)(const uint8_t* buf, size_t len, void* stream);
/** Serialise @a uri with a series of calls to @a sink. */
SERD_API
size_t
serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream);
/** @} */
/** @name String
* @{
*/
/** Measured UTF-8 string. */
typedef struct {
size_t n_bytes; ///< Size in bytes including trailing null byte
size_t n_chars; ///< Length in characters
uint8_t buf[]; ///< Buffer
} SerdString;
/** Create a new UTF-8 string from @a utf8. */
SERD_API
SerdString*
serd_string_new(const uint8_t* utf8);
/** Copy @a string. */
SERD_API
SerdString*
serd_string_copy(const SerdString* string);
/** Serialise @a uri to a string. */
SERD_API
SerdString*
serd_uri_serialise(const SerdURI* uri,
SerdURI* out);
serd_string_new_from_uri(const SerdURI* uri,
SerdURI* out);
SERD_API
bool
serd_write_node(FILE* file,
const SerdURI* base_uri,
SerdNamespaces ns,
SerdNodeType type,
const SerdString* str,
const SerdString* datatype,
const SerdString* lang);
/** @} */
@ -142,9 +162,6 @@ serd_uri_serialise(const SerdURI* uri, @@ -142,9 +162,6 @@ serd_uri_serialise(const SerdURI* uri,
* @{
*/
/** Reader. */
typedef struct SerdReaderImpl* SerdReader;
/** Handler for base URI changes. */
typedef bool (*SerdBaseHandler)(void* handle,
const SerdString* uri);
@ -194,8 +211,6 @@ serd_reader_free(SerdReader reader); @@ -194,8 +211,6 @@ serd_reader_free(SerdReader reader);
* @{
*/
typedef struct SerdNamespacesImpl* SerdNamespaces;
/** Create a new namespaces dictionary. */
SERD_API
SerdNamespaces

46
src/namespaces.c

@ -32,48 +32,6 @@ struct SerdNamespacesImpl { @@ -32,48 +32,6 @@ struct SerdNamespacesImpl {
size_t n_namespaces;
};
static inline size_t
utf8_strlen(const uint8_t* utf8, size_t* out_n_bytes)
{
size_t n_chars = 0;
size_t i = 0;
for (; utf8[i]; ++i) {
if ((utf8[i] & 0xC0) != 0x80) {
// Does not start with `10', start of a new character
++n_chars;
}
}
if (out_n_bytes) {
*out_n_bytes = i + 1;
}
return n_chars;
}
SERD_API
SerdString*
serd_string_new(const uint8_t* utf8)
{
size_t n_bytes;
size_t n_chars = utf8_strlen(utf8, &n_bytes);
SerdString* const str = malloc(sizeof(SerdString) + n_bytes);
str->n_bytes = n_bytes;
str->n_chars = n_chars;
memcpy(str->buf, utf8, str->n_bytes);
return str;
}
SERD_API
SerdString*
serd_string_copy(const SerdString* s)
{
if (s) {
SerdString* const copy = malloc(sizeof(SerdString) + s->n_bytes);
memcpy(copy, s, sizeof(SerdString) + s->n_bytes);
return copy;
}
return NULL;
}
SERD_API
SerdNamespaces
serd_namespaces_new()
@ -148,9 +106,9 @@ serd_namespaces_expand(SerdNamespaces ns, @@ -148,9 +106,9 @@ serd_namespaces_expand(SerdNamespaces ns,
SerdNamespace* const record = serd_namespaces_find(ns, qname->buf, colon - qname->buf);
if (record) {
uri_prefix->buf = record->uri->buf;
uri_prefix->len = record->uri->n_bytes;
uri_prefix->len = record->uri->n_bytes - 1;
uri_suffix->buf = colon + 1;
uri_suffix->len = qname->n_bytes - (colon - qname->buf) - 1;
uri_suffix->len = qname->n_bytes - (colon - qname->buf) - 2;
return true;
}
return false;

287
src/reader.c

@ -140,22 +140,6 @@ readahead(SerdReader parser, uint8_t* pre, int n) @@ -140,22 +140,6 @@ readahead(SerdReader parser, uint8_t* pre, int n)
return true;
}
static inline unsigned
utf8_char_len(const uint8_t b0)
{
if ((b0 & 0x80) == 0) { // Starts with `0'
return 1;
} else if ((b0 & 0xE0) == 0xC0) { // Starts with `110'
return 2;
} else if ((b0 & 0xF0) == 0xE0) { // Starts with `1110'
return 3;
} else if ((b0 & 0xF8) == 0xF0) { // Starts with `11110'
return 4;
} else {
return 0;
}
}
static inline uchar
peek_utf8_char(SerdReader parser, unsigned* n_bytes)
{
@ -334,52 +318,82 @@ read_hex(SerdReader parser) @@ -334,52 +318,82 @@ read_hex(SerdReader parser)
}
}
static inline uchar
read_hex_escape(SerdReader parser, unsigned length)
static inline bool
read_hex_escape(SerdReader parser, unsigned length, Ref dest)
{
uchar ret = 0;
uint8_t chars[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
uint8_t code[4] = { 0, 0, 0, 0 };
uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (unsigned i = 0; i < length; ++i) {
chars[i] = read_hex(parser);
}
sscanf((const char*)chars, "%X", (uint32_t*)code);
const uint32_t code_num = *(uint32_t*)code;
if (code_num < 0x80) {
fprintf(stderr, "1 byte UTF-8 escape\n");
return code[0];
} else if (code_num < 0x800) {
fprintf(stderr, "2 byte UTF-8 escape\n");
fprintf(stderr, "B0 %X\n", code[0]);
fprintf(stderr, "B1 %X\n", code[1]);
fprintf(stderr, "B2 %X\n", code[2]);
fprintf(stderr, "B3 %X\n", code[3]);
ret = ((0xC0 + ((code[3] & 0x1F) << 2) + ((code[4] & 0xC0) >> 6)) << 8)
+ (code[4] & 0x3F);
fprintf(stderr, "RET %X\n", ret);
} else if (code_num < 0x10000) {
fprintf(stderr, "3 byte UTF-8 escape\n");
buf[i] = read_hex(parser);
}
uint32_t c;
sscanf((const char*)buf, "%X", &c);
unsigned size = 0;
if (c < 0x00000080) {
size = 1;
} else if (c < 0x00000800) {
size = 2;
} else if (c < 0x00010000) {
size = 3;
} else if (c < 0x00200000) {
size = 4;
} else if (c < 0x04000000) {
size = 5;
} else if (c < 0x80000000) {
size = 6;
} else {
fprintf(stderr, "4 byte UTF-8 escape\n");
return false;
}
return ret;
// Build output in buf
// (Note # of bytes = # of leading 1 bits in first byte)
switch (size) {
case 6:
buf[5] = 0x80 | (uint8_t)(c & 0x3F);
c >>= 6;
c |= (4 << 24); // set bit 2
case 5:
buf[4] = 0x80 | (uint8_t)(c & 0x3F);
c >>= 6;
c |= (8 << 18); // set bit 3
case 4:
buf[3] = 0x80 | (uint8_t)(c & 0x3F);
c >>= 6;
c |= (16 << 12); // set bit 4
case 3:
buf[2] = 0x80 | (uint8_t)(c & 0x3F);
c >>= 6;
c |= (32 << 6); // set bit 5
case 2:
buf[1] = 0x80 | (uint8_t)(c & 0x3F);
c >>= 6;
c |= 0xC0; // set bits 6 and 7
case 1:
buf[0] = (uint8_t)c;
}
for (unsigned i = 0; i < size; ++i) {
push_char(parser, dest, buf[i]);
}
return true;
}
static inline uchar
character_escape(SerdReader parser, const uchar esc)
static inline bool
read_character_escape(SerdReader parser, Ref dest)
{
switch (esc) {
switch (peek_char(parser)) {
case '\\':
return eat_char(parser, '\\');
push_char(parser, dest, eat_char(parser, '\\'));
return true;
case 'u':
eat_char(parser, esc);
return read_hex_escape(parser, 4);
eat_char(parser, 'u');
return read_hex_escape(parser, 4, dest);
case 'U':
eat_char(parser, esc);
return read_hex_escape(parser, 8);
eat_char(parser, 'U');
return read_hex_escape(parser, 8, dest);
default:
return 0;
return false;
}
}
@ -387,117 +401,124 @@ character_escape(SerdReader parser, const uchar esc) @@ -387,117 +401,124 @@ character_escape(SerdReader parser, const uchar esc)
// | '\U' hex hex hex hex hex hex hex hex
// | '\\'
// | [#x20-#x5B] | [#x5D-#x10FFFF]
static inline uchar
read_character(SerdReader parser)
static inline bool
read_character(SerdReader parser, Ref dest)
{
const uchar c = peek_char(parser);
uchar esc;
switch (c) {
case '\\':
eat_char(parser, '\\');
esc = character_escape(parser, peek_char(parser));
if (esc) {
return esc;
if (read_character_escape(parser, dest)) {
return true;
} else {
return error(parser, "illegal escape `\\%c'\n", esc);
return error(parser, "invalid escape `\\%c'\n", peek_char(parser));
}
default:
if (in_range(c, 0x20, 0x5B) || in_range(c, 0x5D, 0x10FFF)) {
return eat_char(parser, c);
push_char(parser, dest, eat_char(parser, c));
return true;
} else {
return error(parser, "illegal character `%c'\n", c);
return error(parser, "invalid character `%c'\n", c);
}
}
}
static inline uchar
echaracter_escape(SerdReader parser, const uchar esc)
static inline bool
read_echaracter_escape(SerdReader parser, Ref dest)
{
const uchar ret = character_escape(parser, esc);
if (ret) {
return ret;
if (read_character_escape(parser, dest)) {
return true;
}
switch (esc) {
switch (peek_char(parser)) {
case 't':
eat_char(parser, 't');
return '\t';
push_char(parser, dest, '\t');
return true;
case 'n':
eat_char(parser, 'n');
return '\n';
push_char(parser, dest, '\n');
return true;
case 'r':
eat_char(parser, 'r');
return '\r';
push_char(parser, dest, '\r');
return true;
default:
return 0;
return false;
}
}
// [39] echaracter ::= character | '\t' | '\n' | '\r'
static inline uchar
read_echaracter(SerdReader parser)
static inline bool
read_echaracter(SerdReader parser, Ref dest)
{
uchar c = peek_char(parser);
uchar esc;
switch (c) {
case '\\':
eat_char(parser, '\\');
esc = echaracter_escape(parser, peek_char(parser));
if (esc) {
return esc;
if (read_echaracter_escape(parser, peek_char(parser))) {
return true;
} else {
return error(parser, "illegal escape `\\%c'\n", esc);
return error(parser, "illegal escape `\\%c'\n", peek_char(parser));
}
default:
return read_character(parser);
return read_character(parser, dest);
}
}
static inline uchar
scharacter_escape(SerdReader parser, const uchar esc)
static inline bool
read_scharacter_escape(SerdReader parser, Ref dest)
{
const uchar ret = echaracter_escape(parser, esc);
if (ret) {
return ret;
} else if (esc == '"') {
return eat_char(parser, '"');
if (read_echaracter_escape(parser, dest)) {
return true;
} else if (peek_char(parser) == '"') {
push_char(parser, dest, eat_char(parser, '"'));
return true;
}
return 0;
return false;
}
static inline uchar
ucharacter_escape(SerdReader parser, const uchar esc)
static inline bool
read_ucharacter_escape(SerdReader parser, Ref dest)
{
const uchar ret = echaracter_escape(parser, esc);
if (ret) {
return ret;
} else if (esc == '>') {
return eat_char(parser, '>');
if (read_echaracter_escape(parser, dest)) {
return true;
} else if (peek_char(parser) == '>') {
push_char(parser, dest, eat_char(parser, '>'));
return true;
}
return 0;
return false;
}
// [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD
static inline uchar
read_lcharacter(SerdReader parser, bool* is_escape)
static inline bool
read_lcharacter(SerdReader parser, Ref dest)
{
*is_escape = false;
const uchar c = peek_char(parser);
uchar esc;
uchar c = peek_char(parser);
uint8_t pre[3];
switch (c) {
case '"':
readahead(parser, pre, 3);
if (pre[1] == '\"' && pre[2] == '\"') {
eat_char(parser, '\"');
eat_char(parser, '\"');
eat_char(parser, '\"');
return false;
} else {
push_char(parser, dest, eat_char(parser, '"'));
return true;
}
case '\\':
eat_char(parser, '\\');
esc = scharacter_escape(parser, peek_char(parser));
if (esc) {
*is_escape = true;
return esc;
if (read_scharacter_escape(parser, dest)) {
return true;
} else {
return error(parser, "illegal escape `\\%c'\n", esc);
return error(parser, "illegal escape `\\%c'\n", peek_char(parser));
}
case 0x9: case 0xA: case 0xD:
eat_char(parser, c);
push_char(parser, dest, eat_char(parser, c));
return c;
default:
return read_echaracter(parser);
return read_echaracter(parser, dest);
}
}
@ -506,48 +527,39 @@ static inline bool @@ -506,48 +527,39 @@ static inline bool
read_scharacter(SerdReader parser, Ref dest)
{
uchar c = peek_char(parser);
uchar esc;
switch (c) {
case '\\':
eat_char(parser, '\\');
esc = scharacter_escape(parser, peek_char(parser));
if (esc) {
push_char(parser, dest, esc);
if (read_scharacter_escape(parser, dest)) {
return true;
} else {
return error(parser, "illegal escape `\\%c'\n", esc);
return error(parser, "illegal escape `\\%c'\n", peek_char(parser));
}
case '\"':
return false;
default:
c = read_character(parser);
if (c) {
push_char(parser, dest, c);
}
return c;
return read_character(parser, dest);
}
}
// Spec: [41] ucharacter ::= ( character - #x3E ) | '\>'
// Actual: [41] ucharacter ::= ( echaracter - #x3E ) | '\>'
static inline uchar
read_ucharacter(SerdReader parser)
static inline bool
read_ucharacter(SerdReader parser, Ref dest)
{
const uchar c = peek_char(parser);
uchar esc;
uchar c = peek_char(parser);
switch (c) {
case '\\':
eat_char(parser, '\\');
esc = ucharacter_escape(parser, peek_char(parser));
if (esc) {
return esc;
if (read_ucharacter_escape(parser, dest)) {
return true;
} else {
return error(parser, "illegal escape `\\%c'\n", esc);
return error(parser, "illegal escape `\\%c'\n", peek_char(parser));
}
case '>':
return 0;
return false;
default:
return read_character(parser);
return read_character(parser, dest);
}
}
@ -607,21 +619,7 @@ read_longString(SerdReader parser) @@ -607,21 +619,7 @@ read_longString(SerdReader parser)
{
eat_string(parser, "\"\"\"", 3);
Ref str = push_string(parser, "", 1);
uchar c;
bool is_escape = false;
while ((c = read_lcharacter(parser, &is_escape)) != 0) {
if (c == '\"' && !is_escape) {
uint8_t pre[2];
readahead(parser, pre, 2);
if (pre[0] == '\"' && pre[1] == '\"') {
eat_char(parser, '\"');
eat_char(parser, '\"');
return str;
}
}
push_char(parser, str, c);
}
eat_string(parser, "\"\"\"", 3);
while (read_lcharacter(parser, str)) {}
return str;
}
@ -658,11 +656,8 @@ read_quotedString(SerdReader parser) @@ -658,11 +656,8 @@ read_quotedString(SerdReader parser)
static inline Ref
read_relativeURI(SerdReader parser)
{
uchar c;
Ref str = push_string(parser, "", 1);
while ((c = read_ucharacter(parser)) != 0) {
push_char(parser, str, c);
}
Ref str = push_string(parser, "", 1);
while (read_ucharacter(parser, str)) {}
return str;
}

84
src/serdi.c

@ -49,7 +49,7 @@ event_base(void* handle, @@ -49,7 +49,7 @@ event_base(void* handle,
assert(false);
return false;
}
base_uri_str = serd_uri_serialise(&abs_base_uri, &base_uri);
base_uri_str = serd_string_new_from_uri(&abs_base_uri, &base_uri);
// FIXME: double parse
serd_uri_parse(base_uri_str->buf, &base_uri);
} else {
@ -83,7 +83,7 @@ event_prefix(void* handle, @@ -83,7 +83,7 @@ event_prefix(void* handle,
return false;
}
SerdURI new_abs_uri;
SerdString* abs_uri_string = serd_uri_serialise(&abs_uri, &new_abs_uri);
SerdString* abs_uri_string = serd_string_new_from_uri(&abs_uri, &new_abs_uri);
serd_namespaces_add(state->ns, name, abs_uri_string);
} else {
serd_namespaces_add(state->ns, name, uri_string);
@ -91,77 +91,6 @@ event_prefix(void* handle, @@ -91,77 +91,6 @@ event_prefix(void* handle,
return true;
}
static inline bool
write_node(State* state,
const SerdString* str,
SerdNodeType type,
const SerdString* datatype,
const SerdString* lang)
{
SerdRange uri_prefix;
SerdRange uri_suffix;
switch (type) {
case BLANK:
fwrite("_:", 1, 2, state->out_fd);
fwrite(str->buf, 1, str->n_bytes - 1, state->out_fd);
break;
case QNAME:
if (!serd_namespaces_expand(state->ns, str, &uri_prefix, &uri_suffix)) {
fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
return false;
}
fwrite("<", 1, 1, state->out_fd);
fwrite(uri_prefix.buf, 1, uri_prefix.len - 1, state->out_fd);
fwrite(uri_suffix.buf, 1, uri_suffix.len - 1, state->out_fd);
fwrite(">", 1, 1, state->out_fd);
break;
case URI:
if (serd_uri_string_is_relative(str->buf)) {
SerdURI uri;
if (serd_uri_parse(str->buf, &uri)) {
SerdURI abs_uri;
if (serd_uri_resolve(&uri, &state->base_uri, &abs_uri)) {
fwrite("<", 1, 1, state->out_fd);
serd_uri_write(&abs_uri, state->out_fd);
fwrite(">", 1, 1, state->out_fd);
return true;
}
}
} else {
fwrite("<", 1, 1, state->out_fd);
fwrite(str->buf, 1, str->n_bytes - 1, state->out_fd);
fwrite(">", 1, 1, state->out_fd);
return true;
}
return false;
case LITERAL:
fwrite("\"", 1, 1, state->out_fd);
for (size_t i = 0; i < str->n_bytes - 1; ++i) {
const char c = str->buf[i];
switch (c) {
case '\\': fwrite("\\\\", 1, 2, state->out_fd); break;
case '\n': fwrite("\\n", 1, 2, state->out_fd); break;
case '\r': fwrite("\\r", 1, 2, state->out_fd); break;
case '\t': fwrite("\\t", 1, 2, state->out_fd); break;
case '"': fwrite("\\\"", 1, 2, state->out_fd); break;
default:
fwrite(&c, 1, 1, state->out_fd);
}
}
fwrite("\"", 1, 1, state->out_fd);
if (lang) {
fwrite("@\"", 1, 2, state->out_fd);
fwrite(lang->buf, 1, lang->n_bytes - 1, state->out_fd);
fwrite("\"", 1, 1, state->out_fd);
} else if (datatype) {
fwrite("^^", 1, 2, state->out_fd);
write_node(state, datatype, URI, NULL, NULL);
}
break;
}
return true;
}
static bool
event_statement(void* handle,
const SerdString* graph,
@ -176,11 +105,14 @@ event_statement(void* handle, @@ -176,11 +105,14 @@ event_statement(void* handle,
{
State* const state = (State*)handle;
FILE* const fd = state->out_fd;
write_node(state, subject, subject_type, NULL, NULL);
serd_write_node(fd, &state->base_uri, state->ns,
subject_type, subject, NULL, NULL);
fwrite(" ", 1, 1, fd);
write_node(state, predicate, predicate_type, NULL, NULL);
serd_write_node(fd, &state->base_uri, state->ns,
predicate_type, predicate, NULL, NULL);
fwrite(" ", 1, 1, fd);
write_node(state, object, object_type, object_datatype, object_lang);
serd_write_node(fd, &state->base_uri, state->ns,
object_type, object, object_datatype, object_lang);
fwrite(" .\n", 1, 3, fd);
return true;
}

65
src/string.c

@ -0,0 +1,65 @@ @@ -0,0 +1,65 @@
/* Serd, an RDF serialisation library.
* Copyright 2011 David Robillard <d@drobilla.net>
*
* Serd is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Serd is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "serd/serd.h"
static inline size_t
utf8_strlen(const uint8_t* utf8, size_t* out_n_bytes)
{
size_t n_chars = 0;
size_t i = 0;
for (; utf8[i]; ++i) {
if ((utf8[i] & 0xC0) != 0x80) {
// Does not start with `10', start of a new character
++n_chars;
}
}
if (out_n_bytes) {
*out_n_bytes = i + 1;
}
return n_chars;
}
SERD_API
SerdString*
serd_string_new(const uint8_t* utf8)
{
size_t n_bytes;
size_t n_chars = utf8_strlen(utf8, &n_bytes);
SerdString* const str = malloc(sizeof(SerdString) + n_bytes);
str->n_bytes = n_bytes;
str->n_chars = n_chars;
memcpy(str->buf, utf8, str->n_bytes);
return str;
}
SERD_API
SerdString*
serd_string_copy(const SerdString* s)
{
if (s) {
SerdString* const copy = malloc(sizeof(SerdString) + s->n_bytes);
memcpy(copy, s, sizeof(SerdString) + s->n_bytes);
return copy;
}
return NULL;
}

49
src/uri.c

@ -260,10 +260,9 @@ serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) @@ -260,10 +260,9 @@ serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t)
return true;
}
typedef size_t (*Sink)(const void* data, size_t size, size_t nmemb, void* stream);
static size_t
serd_uri_serialise_internal(const SerdURI* uri, Sink sink, void* stream)
SERD_API
size_t
serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream)
{
/* See http://tools.ietf.org/html/rfc3986#section-5.3 */
@ -271,16 +270,16 @@ serd_uri_serialise_internal(const SerdURI* uri, Sink sink, void* stream) @@ -271,16 +270,16 @@ serd_uri_serialise_internal(const SerdURI* uri, Sink sink, void* stream)
#define WRITE(buf, len) \
write_size += len; \
if (len) { \
sink(buf, 1, len, stream); \
sink((const uint8_t*)buf, len, stream); \
}
#define WRITE_CHAR(c) WRITE(&(c), 1)
#define WRITE_COMPONENT(prefix, field, suffix) \
if ((field).len) { \
for (const char* c = prefix; *c != '\0'; ++c) { \
for (const uint8_t* c = (const uint8_t*)prefix; *c != '\0'; ++c) { \
WRITE(c, 1); \
} \
WRITE((field).buf, (field).len); \
for (const char* c = suffix; *c != '\0'; ++c) { \
for (const uint8_t* c = (const uint8_t*)suffix; *c != '\0'; ++c) { \
WRITE(c, 1); \
} \
}
@ -354,26 +353,9 @@ serd_uri_serialise_internal(const SerdURI* uri, Sink sink, void* stream) @@ -354,26 +353,9 @@ serd_uri_serialise_internal(const SerdURI* uri, Sink sink, void* stream)
// Note uri->fragment.buf includes the leading `#'
WRITE_COMPONENT("", uri->fragment, "");
}
WRITE("\0", 1);
return write_size;
}
SERD_API
bool
serd_uri_write(const SerdURI* uri, FILE* file)
{
//#if 0
SerdURI flat_uri;
SerdString* const flat_uri_str = serd_uri_serialise(uri, &flat_uri);
if (flat_uri_str) {
fwrite(flat_uri_str->buf, 1, flat_uri_str->n_bytes - 1, file);
free(flat_uri_str);
return true;
}
return false;
//#endif
//return (serd_uri_serialise_internal(uri, (Sink)fwrite, file) > 0);
}
static size_t
serd_uri_string_length(const SerdURI* uri)
@ -393,18 +375,17 @@ serd_uri_string_length(const SerdURI* uri) @@ -393,18 +375,17 @@ serd_uri_string_length(const SerdURI* uri)
}
static size_t
string_write(const void* data, size_t size, size_t nmemb, void* stream)
string_sink(const uint8_t* buf, size_t len, void* stream)
{
uint8_t** ptr = (uint8_t**)stream;
const size_t write_size = (size * nmemb);
memcpy(*ptr, data, write_size);
*ptr += write_size;
return nmemb;
uint8_t** ptr = (uint8_t**)stream;
memcpy(*ptr, buf, len);
*ptr += len;
return len;
}
SERD_API
SerdString*
serd_uri_serialise(const SerdURI* uri, SerdURI* out)
serd_string_new_from_uri(const SerdURI* uri, SerdURI* out)
{
const size_t len = serd_uri_string_length(uri);
SerdString* str = malloc(sizeof(SerdString) + len + 1);
@ -412,10 +393,10 @@ serd_uri_serialise(const SerdURI* uri, SerdURI* out) @@ -412,10 +393,10 @@ serd_uri_serialise(const SerdURI* uri, SerdURI* out)
str->n_chars = len; // FIXME: UTF-8
uint8_t* ptr = str->buf;
const size_t actual_len = serd_uri_serialise_internal(uri, string_write, &ptr);
const size_t actual_len = serd_uri_serialise(uri, string_sink, &ptr);
str->buf[actual_len] = '\0';
str->n_bytes = actual_len;
str->buf[actual_len + 1] = '\0';
str->n_bytes = actual_len + 1;
str->n_chars = str->n_bytes - 1;
#ifdef URI_DEBUG

170
src/write.c

@ -0,0 +1,170 @@ @@ -0,0 +1,170 @@
/* Serd, an RDF serialisation library.
* Copyright 2011 David Robillard <d@drobilla.net>
*
* Serd is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Serd is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <assert.h>
#include <stdlib.h>
#include "serd/serd.h"
static size_t
file_sink(const uint8_t* buf, size_t len, void* stream)
{
FILE* file = (FILE*)stream;
return fwrite(buf, 1, len, file);
}
static inline bool
serd_write_uri(FILE* file, const SerdURI* uri)
{
return serd_uri_serialise(uri, file_sink, file);
}
static bool
serd_write_ascii(const uint8_t* utf8, size_t n_bytes, FILE* out_fd, const uint8_t esc)
{
for (size_t i = 0; i < n_bytes;) {
uint8_t in = utf8[i++];
switch (in) {
case '\\': fwrite("\\\\", 1, 2, out_fd); continue;
case '\n': fwrite("\\n", 1, 2, out_fd); continue;
case '\r': fwrite("\\r", 1, 2, out_fd); continue;
case '\t': fwrite("\\t", 1, 2, out_fd); continue;
case '"': if (esc == '"') { fwrite("\\\"", 1, 2, out_fd); continue; }
default: break;
}
if (in == esc) {
fprintf(out_fd, "\\u%04X", esc);
continue;
}
uint32_t c = 0;
size_t size = 0;
if ((in & 0x80) == 0) { // Starts with `0'
size = 1;
c = in & 0x7F;
if ((in >= 0x20) && (in <= 0x7E)) { // Printable ASCII
fwrite(&in, 1, 1, out_fd);
continue;
}
} else if ((in & 0xE0) == 0xC0) { // Starts with `110'
size = 2;
c = in & 0x1F;
} else if ((in & 0xF0) == 0xE0) { // Starts with `1110'
size = 3;
c = in & 0x0F;
} else if ((in & 0xF8) == 0xF0) { // Starts with `11110'
size = 4;
c = in & 0x07;
} else if ((in & 0xFC) == 0xF8) { // Starts with `111110'
size = 5;
c = in & 0x03;
} else if ((in & 0xFE) == 0xFC) { // Starts with `1111110'
size = 6;
c = in & 0x01;
} else {
fprintf(stderr, "invalid UTF-8 at offset %zu: %X\n", i, in);
return false;
}
#define READ_BYTE() do { \
assert(i < n_bytes); \
in = utf8[i++] & 0x3f; \
c <<= 6; \
c |= in; \
} while (0)
switch (size) {
case 6: READ_BYTE();
case 5: READ_BYTE();
case 4: READ_BYTE();
case 3: READ_BYTE();
case 2: READ_BYTE();
}
if (c < 0xFFFF) {
fprintf(out_fd, "\\u%04X", c);
} else {
fprintf(out_fd, "\\U%08X", c);
}
}
return true;
}
SERD_API
bool
serd_write_node(FILE* fd,
const SerdURI* base_uri,
SerdNamespaces ns,
SerdNodeType type,
const SerdString* str,
const SerdString* datatype,
const SerdString* lang)
{
SerdRange uri_prefix;
SerdRange uri_suffix;
switch (type) {
case BLANK:
fwrite("_:", 1, 2, fd);
fwrite(str->buf, 1, str->n_bytes - 1, fd);
break;
case QNAME:
if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) {
fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
return false;
}
fwrite("<", 1, 1, fd);
serd_write_ascii(uri_prefix.buf, uri_prefix.len, fd, '>');
serd_write_ascii(uri_suffix.buf, uri_suffix.len, fd, '>');
fwrite(">", 1, 1, fd);
break;
case URI:
if (serd_uri_string_is_relative(str->buf)) {
SerdURI uri;
if (serd_uri_parse(str->buf, &uri)) {
SerdURI abs_uri;
if (serd_uri_resolve(&uri, base_uri, &abs_uri)) {
fwrite("<", 1, 1, fd);
serd_write_uri(fd, &abs_uri);
fwrite(">", 1, 1, fd);
return true;
}
}
} else {
fwrite("<", 1, 1, fd);
serd_write_ascii(str->buf, str->n_bytes - 1, fd, '>');
fwrite(">", 1, 1, fd);
return true;
}
return false;
case LITERAL:
fwrite("\"", 1, 1, fd);
serd_write_ascii(str->buf, str->n_bytes - 1, fd, '"');
fwrite("\"", 1, 1, fd);
if (lang) {
fwrite("@\"", 1, 2, fd);
fwrite(lang->buf, 1, lang->n_bytes - 1, fd);
fwrite("\"", 1, 1, fd);
} else if (datatype) {
fwrite("^^", 1, 2, fd);
serd_write_node(fd, base_uri, ns, URI, datatype, NULL, NULL);
}
break;
}
return true;
}

8
wscript

@ -54,7 +54,13 @@ def build(bld): @@ -54,7 +54,13 @@ def build(bld):
# Pkgconfig file
autowaf.build_pc(bld, 'SERD', SERD_VERSION, ['REDLAND'])
lib_source = 'src/reader.c src/namespaces.c src/uri.c'
lib_source = '''
src/namespaces.c
src/reader.c
src/string.c
src/uri.c
src/write.c
'''
# Library
obj = bld(features = 'c cshlib')

Loading…
Cancel
Save