You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1114 lines
30 KiB
1114 lines
30 KiB
/* |
|
Copyright 2011-2020 David Robillard <http://drobilla.net> |
|
|
|
Permission to use, copy, modify, and/or distribute this software for any |
|
purpose with or without fee is hereby granted, provided that the above |
|
copyright notice and this permission notice appear in all copies. |
|
|
|
THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
|
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
|
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
|
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
|
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
|
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
*/ |
|
|
|
#include "byte_sink.h" |
|
#include "serd_internal.h" |
|
#include "stack.h" |
|
#include "string_utils.h" |
|
#include "uri_utils.h" |
|
|
|
#include "serd/serd.h" |
|
|
|
#include <assert.h> |
|
#include <stdarg.h> |
|
#include <stdbool.h> |
|
#include <stdint.h> |
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
|
|
typedef enum { |
|
FIELD_NONE, |
|
FIELD_SUBJECT, |
|
FIELD_PREDICATE, |
|
FIELD_OBJECT, |
|
FIELD_GRAPH |
|
} Field; |
|
|
|
typedef struct { |
|
SerdNode graph; |
|
SerdNode subject; |
|
SerdNode predicate; |
|
} WriteContext; |
|
|
|
static const WriteContext WRITE_CONTEXT_NULL = {{0, 0, 0, 0, SERD_NOTHING}, |
|
{0, 0, 0, 0, SERD_NOTHING}, |
|
{0, 0, 0, 0, SERD_NOTHING}}; |
|
|
|
typedef enum { |
|
SEP_NONE, |
|
SEP_END_S, ///< End of a subject ('.') |
|
SEP_END_P, ///< End of a predicate (';') |
|
SEP_END_O, ///< End of an object (',') |
|
SEP_S_P, ///< Between a subject and predicate (whitespace) |
|
SEP_P_O, ///< Between a predicate and object (whitespace) |
|
SEP_ANON_BEGIN, ///< Start of anonymous node ('[') |
|
SEP_ANON_END, ///< End of anonymous node (']') |
|
SEP_LIST_BEGIN, ///< Start of list ('(') |
|
SEP_LIST_SEP, ///< List separator (whitespace) |
|
SEP_LIST_END, ///< End of list (')') |
|
SEP_GRAPH_BEGIN, ///< Start of graph ('{') |
|
SEP_GRAPH_END, ///< End of graph ('}') |
|
SEP_URI_BEGIN, ///< URI start quote ('<') |
|
SEP_URI_END ///< URI end quote ('>') |
|
} Sep; |
|
|
|
typedef struct { |
|
const char* str; ///< Sep string |
|
uint8_t len; ///< Length of sep string |
|
uint8_t space_before; ///< Newline before sep |
|
uint8_t space_after_node; ///< Newline after sep if after node |
|
uint8_t space_after_sep; ///< Newline after sep if after sep |
|
} SepRule; |
|
|
|
static const SepRule rules[] = {{NULL, 0, 0, 0, 0}, |
|
{" .\n\n", 4, 0, 0, 0}, |
|
{" ;", 2, 0, 1, 1}, |
|
{" ,", 2, 0, 1, 0}, |
|
{NULL, 0, 0, 1, 0}, |
|
{" ", 1, 0, 0, 0}, |
|
{"[", 1, 0, 1, 1}, |
|
{"]", 1, 1, 0, 0}, |
|
{"(", 1, 0, 0, 0}, |
|
{NULL, 0, 0, 1, 0}, |
|
{")", 1, 1, 0, 0}, |
|
{" {", 2, 0, 1, 1}, |
|
{" }", 2, 0, 1, 1}, |
|
{"<", 1, 0, 0, 0}, |
|
{">", 1, 0, 0, 0}, |
|
{"\n", 1, 0, 1, 0}}; |
|
|
|
struct SerdWriterImpl { |
|
SerdSyntax syntax; |
|
SerdStyle style; |
|
SerdEnv* env; |
|
SerdNode root_node; |
|
SerdURI root_uri; |
|
SerdURI base_uri; |
|
SerdStack anon_stack; |
|
SerdByteSink byte_sink; |
|
SerdErrorSink error_sink; |
|
void* error_handle; |
|
WriteContext context; |
|
SerdNode list_subj; |
|
unsigned list_depth; |
|
unsigned indent; |
|
uint8_t* bprefix; |
|
size_t bprefix_len; |
|
Sep last_sep; |
|
bool empty; |
|
}; |
|
|
|
typedef enum { WRITE_STRING, WRITE_LONG_STRING } TextContext; |
|
|
|
static bool |
|
write_node(SerdWriter* writer, |
|
const SerdNode* node, |
|
const SerdNode* datatype, |
|
const SerdNode* lang, |
|
Field field, |
|
SerdStatementFlags flags); |
|
|
|
static bool |
|
supports_abbrev(const SerdWriter* writer) |
|
{ |
|
return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; |
|
} |
|
|
|
static bool |
|
supports_uriref(const SerdWriter* writer) |
|
{ |
|
return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; |
|
} |
|
|
|
static void |
|
w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...) |
|
{ |
|
/* TODO: This results in errors with no file information, which is not |
|
helpful when re-serializing a file (particularly for "undefined |
|
namespace prefix" errors. The statement sink API needs to be changed to |
|
add a Cursor parameter so the source can notify the writer of the |
|
statement origin for better error reporting. */ |
|
|
|
va_list args; |
|
va_start(args, fmt); |
|
const SerdError e = {st, (const uint8_t*)"", 0, 0, fmt, &args}; |
|
serd_error(writer->error_sink, writer->error_handle, &e); |
|
va_end(args); |
|
} |
|
|
|
static inline WriteContext* |
|
anon_stack_top(SerdWriter* writer) |
|
{ |
|
assert(!serd_stack_is_empty(&writer->anon_stack)); |
|
return (WriteContext*)(writer->anon_stack.buf + writer->anon_stack.size - |
|
sizeof(WriteContext)); |
|
} |
|
|
|
static void |
|
copy_node(SerdNode* dst, const SerdNode* src) |
|
{ |
|
if (src) { |
|
dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1); |
|
dst->n_bytes = src->n_bytes; |
|
dst->n_chars = src->n_chars; |
|
dst->flags = src->flags; |
|
dst->type = src->type; |
|
memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); |
|
} else { |
|
dst->type = SERD_NOTHING; |
|
} |
|
} |
|
|
|
static inline size_t |
|
sink(const void* buf, size_t len, SerdWriter* writer) |
|
{ |
|
return serd_byte_sink_write(buf, len, &writer->byte_sink); |
|
} |
|
|
|
// Write a single character, as an escape for single byte characters |
|
// (Caller prints any single byte characters that don't need escaping) |
|
static size_t |
|
write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) |
|
{ |
|
char escape[11] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
|
const uint32_t c = parse_utf8_char(utf8, size); |
|
switch (*size) { |
|
case 0: |
|
w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]); |
|
return sink(replacement_char, sizeof(replacement_char), writer); |
|
case 1: |
|
snprintf(escape, sizeof(escape), "\\u%04X", utf8[0]); |
|
return sink(escape, 6, writer); |
|
default: |
|
break; |
|
} |
|
|
|
if (!(writer->style & SERD_STYLE_ASCII)) { |
|
// Write UTF-8 character directly to UTF-8 output |
|
return sink(utf8, *size, writer); |
|
} |
|
|
|
if (c <= 0xFFFF) { |
|
snprintf(escape, sizeof(escape), "\\u%04X", c); |
|
return sink(escape, 6, writer); |
|
} |
|
|
|
snprintf(escape, sizeof(escape), "\\U%08X", c); |
|
return sink(escape, 10, writer); |
|
} |
|
|
|
static inline bool |
|
uri_must_escape(const uint8_t c) |
|
{ |
|
switch (c) { |
|
case ' ': |
|
case '"': |
|
case '<': |
|
case '>': |
|
case '\\': |
|
case '^': |
|
case '`': |
|
case '{': |
|
case '|': |
|
case '}': |
|
return true; |
|
default: |
|
return !in_range(c, 0x20, 0x7E); |
|
} |
|
} |
|
|
|
static size_t |
|
write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) |
|
{ |
|
size_t len = 0; |
|
for (size_t i = 0; i < n_bytes;) { |
|
size_t j = i; // Index of next character that must be escaped |
|
for (; j < n_bytes; ++j) { |
|
if (uri_must_escape(utf8[j])) { |
|
break; |
|
} |
|
} |
|
|
|
// Bulk write all characters up to this special one |
|
len += sink(&utf8[i], j - i, writer); |
|
if ((i = j) == n_bytes) { |
|
break; // Reached end |
|
} |
|
|
|
// Write UTF-8 character |
|
size_t size = 0; |
|
len += write_character(writer, utf8 + i, &size); |
|
i += size; |
|
if (size == 0) { |
|
// Corrupt input, scan to start of next character |
|
for (++i; i < n_bytes && (utf8[i] & 0x80); ++i) { |
|
} |
|
} |
|
} |
|
|
|
return len; |
|
} |
|
|
|
static bool |
|
lname_must_escape(const uint8_t c) |
|
{ |
|
/* This arbitrary list of characters, most of which have nothing to do with |
|
Turtle, must be handled as special cases here because the RDF and SPARQL |
|
WGs are apparently intent on making the once elegant Turtle a baroque |
|
and inconsistent mess, throwing elegance and extensibility completely |
|
out the window for no good reason. |
|
|
|
Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped |
|
in local names, so they are not escaped here. */ |
|
|
|
switch (c) { |
|
case '\'': |
|
case '!': |
|
case '#': |
|
case '$': |
|
case '%': |
|
case '&': |
|
case '(': |
|
case ')': |
|
case '*': |
|
case '+': |
|
case ',': |
|
case '/': |
|
case ';': |
|
case '=': |
|
case '?': |
|
case '@': |
|
case '~': |
|
return true; |
|
default: |
|
break; |
|
} |
|
return false; |
|
} |
|
|
|
static size_t |
|
write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) |
|
{ |
|
size_t len = 0; |
|
for (size_t i = 0; i < n_bytes; ++i) { |
|
size_t j = i; // Index of next character that must be escaped |
|
for (; j < n_bytes; ++j) { |
|
if (lname_must_escape(utf8[j])) { |
|
break; |
|
} |
|
} |
|
|
|
// Bulk write all characters up to this special one |
|
len += sink(&utf8[i], j - i, writer); |
|
if ((i = j) == n_bytes) { |
|
break; // Reached end |
|
} |
|
|
|
// Write escape |
|
len += sink("\\", 1, writer); |
|
len += sink(&utf8[i], 1, writer); |
|
} |
|
|
|
return len; |
|
} |
|
|
|
static size_t |
|
write_text(SerdWriter* writer, |
|
TextContext ctx, |
|
const uint8_t* utf8, |
|
size_t n_bytes) |
|
{ |
|
size_t len = 0; |
|
for (size_t i = 0; i < n_bytes;) { |
|
// Fast bulk write for long strings of printable ASCII |
|
size_t j = i; |
|
for (; j < n_bytes; ++j) { |
|
if (utf8[j] == '\\' || utf8[j] == '"' || |
|
(!in_range(utf8[j], 0x20, 0x7E))) { |
|
break; |
|
} |
|
} |
|
|
|
len += sink(&utf8[i], j - i, writer); |
|
if ((i = j) == n_bytes) { |
|
break; // Reached end |
|
} |
|
|
|
const uint8_t in = utf8[i++]; |
|
if (ctx == WRITE_LONG_STRING) { |
|
switch (in) { |
|
case '\\': |
|
len += sink("\\\\", 2, writer); |
|
continue; |
|
case '\b': |
|
len += sink("\\b", 2, writer); |
|
continue; |
|
case '\n': |
|
case '\r': |
|
case '\t': |
|
case '\f': |
|
len += sink(&in, 1, writer); // Write character as-is |
|
continue; |
|
case '\"': |
|
if (i == n_bytes) { // '"' at string end |
|
len += sink("\\\"", 2, writer); |
|
} else { |
|
len += sink(&in, 1, writer); |
|
} |
|
continue; |
|
default: |
|
break; |
|
} |
|
} else if (ctx == WRITE_STRING) { |
|
switch (in) { |
|
case '\\': |
|
len += sink("\\\\", 2, writer); |
|
continue; |
|
case '\n': |
|
len += sink("\\n", 2, writer); |
|
continue; |
|
case '\r': |
|
len += sink("\\r", 2, writer); |
|
continue; |
|
case '\t': |
|
len += sink("\\t", 2, writer); |
|
continue; |
|
case '"': |
|
len += sink("\\\"", 2, writer); |
|
continue; |
|
default: |
|
break; |
|
} |
|
if (writer->syntax == SERD_TURTLE) { |
|
switch (in) { |
|
case '\b': |
|
len += sink("\\b", 2, writer); |
|
continue; |
|
case '\f': |
|
len += sink("\\f", 2, writer); |
|
continue; |
|
default: |
|
break; |
|
} |
|
} |
|
} |
|
|
|
// Write UTF-8 character |
|
size_t size = 0; |
|
len += write_character(writer, utf8 + i - 1, &size); |
|
if (size == 0) { |
|
// Corrupt input, scan to start of next character |
|
for (; i < n_bytes && (utf8[i] & 0x80); ++i) { |
|
} |
|
} else { |
|
i += size - 1; |
|
} |
|
} |
|
|
|
return len; |
|
} |
|
|
|
static size_t |
|
uri_sink(const void* buf, size_t len, void* stream) |
|
{ |
|
return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len); |
|
} |
|
|
|
static void |
|
write_newline(SerdWriter* writer) |
|
{ |
|
sink("\n", 1, writer); |
|
for (unsigned i = 0; i < writer->indent; ++i) { |
|
sink("\t", 1, writer); |
|
} |
|
} |
|
|
|
static bool |
|
write_sep(SerdWriter* writer, const Sep sep) |
|
{ |
|
const SepRule* rule = &rules[sep]; |
|
if (rule->space_before) { |
|
write_newline(writer); |
|
} |
|
|
|
if (rule->str) { |
|
sink(rule->str, rule->len, writer); |
|
} |
|
|
|
if ((writer->last_sep && rule->space_after_sep) || |
|
(!writer->last_sep && rule->space_after_node)) { |
|
write_newline(writer); |
|
} else if (writer->last_sep && rule->space_after_node) { |
|
sink(" ", 1, writer); |
|
} |
|
|
|
writer->last_sep = sep; |
|
return true; |
|
} |
|
|
|
static SerdStatus |
|
reset_context(SerdWriter* writer, bool graph) |
|
{ |
|
if (graph) { |
|
writer->context.graph.type = SERD_NOTHING; |
|
} |
|
|
|
writer->context.subject.type = SERD_NOTHING; |
|
writer->context.predicate.type = SERD_NOTHING; |
|
writer->empty = false; |
|
return SERD_SUCCESS; |
|
} |
|
|
|
static SerdStatus |
|
free_context(SerdWriter* writer) |
|
{ |
|
serd_node_free(&writer->context.graph); |
|
serd_node_free(&writer->context.subject); |
|
serd_node_free(&writer->context.predicate); |
|
return reset_context(writer, true); |
|
} |
|
|
|
static bool |
|
is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags) |
|
{ |
|
return (supports_abbrev(writer) && |
|
((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) || |
|
(field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))); |
|
} |
|
|
|
static bool |
|
write_literal(SerdWriter* writer, |
|
const SerdNode* node, |
|
const SerdNode* datatype, |
|
const SerdNode* lang, |
|
SerdStatementFlags flags) |
|
{ |
|
if (supports_abbrev(writer) && datatype && datatype->buf) { |
|
const char* type_uri = (const char*)datatype->buf; |
|
if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && |
|
(!strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") || |
|
!strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) { |
|
sink(node->buf, node->n_bytes, writer); |
|
return true; |
|
} |
|
|
|
if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && |
|
!strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") && |
|
strchr((const char*)node->buf, '.') && |
|
node->buf[node->n_bytes - 1] != '.') { |
|
/* xsd:decimal literals without trailing digits, e.g. "5.", can |
|
not be written bare in Turtle. We could add a 0 which is |
|
prettier, but changes the text and breaks round tripping. |
|
*/ |
|
sink(node->buf, node->n_bytes, writer); |
|
return true; |
|
} |
|
} |
|
|
|
if (supports_abbrev(writer) && |
|
(node->flags & (SERD_HAS_NEWLINE | SERD_HAS_QUOTE))) { |
|
sink("\"\"\"", 3, writer); |
|
write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes); |
|
sink("\"\"\"", 3, writer); |
|
} else { |
|
sink("\"", 1, writer); |
|
write_text(writer, WRITE_STRING, node->buf, node->n_bytes); |
|
sink("\"", 1, writer); |
|
} |
|
if (lang && lang->buf) { |
|
sink("@", 1, writer); |
|
sink(lang->buf, lang->n_bytes, writer); |
|
} else if (datatype && datatype->buf) { |
|
sink("^^", 2, writer); |
|
return write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); |
|
} |
|
return true; |
|
} |
|
|
|
// Return true iff `buf` is a valid prefixed name suffix |
|
static inline bool |
|
is_name(const uint8_t* buf, const size_t len) |
|
{ |
|
// TODO: This is more strict than it should be |
|
for (size_t i = 0; i < len; ++i) { |
|
if (!(is_alpha(buf[i]) || is_digit(buf[i]))) { |
|
return false; |
|
} |
|
} |
|
|
|
return true; |
|
} |
|
|
|
static bool |
|
write_uri_node(SerdWriter* const writer, |
|
const SerdNode* node, |
|
const Field field, |
|
const SerdStatementFlags flags) |
|
{ |
|
SerdNode prefix; |
|
SerdChunk suffix; |
|
|
|
if (is_inline_start(writer, field, flags)) { |
|
++writer->indent; |
|
write_sep(writer, SEP_ANON_BEGIN); |
|
sink("== ", 3, writer); |
|
} |
|
|
|
const bool has_scheme = serd_uri_string_has_scheme(node->buf); |
|
if (supports_abbrev(writer)) { |
|
if (field == FIELD_PREDICATE && |
|
!strcmp((const char*)node->buf, NS_RDF "type")) { |
|
return sink("a", 1, writer) == 1; |
|
} |
|
|
|
if (!strcmp((const char*)node->buf, NS_RDF "nil")) { |
|
return sink("()", 2, writer) == 2; |
|
} |
|
|
|
if (has_scheme && (writer->style & SERD_STYLE_CURIED) && |
|
serd_env_qualify(writer->env, node, &prefix, &suffix) && |
|
is_name(suffix.buf, suffix.len)) { |
|
write_uri(writer, prefix.buf, prefix.n_bytes); |
|
sink(":", 1, writer); |
|
write_uri(writer, suffix.buf, suffix.len); |
|
return true; |
|
} |
|
} |
|
|
|
if (!has_scheme && !supports_uriref(writer) && |
|
!serd_env_get_base_uri(writer->env, NULL)->buf) { |
|
w_err(writer, |
|
SERD_ERR_BAD_ARG, |
|
"syntax does not support URI reference <%s>\n", |
|
node->buf); |
|
return false; |
|
} |
|
|
|
write_sep(writer, SEP_URI_BEGIN); |
|
if (writer->style & SERD_STYLE_RESOLVED) { |
|
SerdURI in_base_uri; |
|
SerdURI uri; |
|
SerdURI abs_uri; |
|
serd_env_get_base_uri(writer->env, &in_base_uri); |
|
serd_uri_parse(node->buf, &uri); |
|
serd_uri_resolve(&uri, &in_base_uri, &abs_uri); |
|
bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri); |
|
SerdURI* root = rooted ? &writer->root_uri : &writer->base_uri; |
|
if (!uri_is_under(&abs_uri, root) || writer->syntax == SERD_NTRIPLES || |
|
writer->syntax == SERD_NQUADS) { |
|
serd_uri_serialise(&abs_uri, uri_sink, writer); |
|
} else { |
|
serd_uri_serialise_relative( |
|
&uri, &writer->base_uri, root, uri_sink, writer); |
|
} |
|
} else { |
|
write_uri(writer, node->buf, node->n_bytes); |
|
} |
|
|
|
write_sep(writer, SEP_URI_END); |
|
if (is_inline_start(writer, field, flags)) { |
|
sink(" ;", 2, writer); |
|
write_newline(writer); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
static bool |
|
write_curie(SerdWriter* const writer, |
|
const SerdNode* node, |
|
const Field field, |
|
const SerdStatementFlags flags) |
|
{ |
|
SerdChunk prefix = {NULL, 0}; |
|
SerdChunk suffix = {NULL, 0}; |
|
SerdStatus st = SERD_SUCCESS; |
|
|
|
switch (writer->syntax) { |
|
case SERD_NTRIPLES: |
|
case SERD_NQUADS: |
|
if ((st = serd_env_expand(writer->env, node, &prefix, &suffix))) { |
|
w_err(writer, st, "undefined namespace prefix `%s'\n", node->buf); |
|
return false; |
|
} |
|
write_sep(writer, SEP_URI_BEGIN); |
|
write_uri(writer, prefix.buf, prefix.len); |
|
write_uri(writer, suffix.buf, suffix.len); |
|
write_sep(writer, SEP_URI_END); |
|
break; |
|
case SERD_TURTLE: |
|
case SERD_TRIG: |
|
if (is_inline_start(writer, field, flags)) { |
|
++writer->indent; |
|
write_sep(writer, SEP_ANON_BEGIN); |
|
sink("== ", 3, writer); |
|
} |
|
write_lname(writer, node->buf, node->n_bytes); |
|
if (is_inline_start(writer, field, flags)) { |
|
sink(" ;", 2, writer); |
|
write_newline(writer); |
|
} |
|
} |
|
|
|
return true; |
|
} |
|
|
|
static bool |
|
write_blank(SerdWriter* const writer, |
|
const SerdNode* node, |
|
const Field field, |
|
const SerdStatementFlags flags) |
|
{ |
|
if (supports_abbrev(writer)) { |
|
if (is_inline_start(writer, field, flags)) { |
|
++writer->indent; |
|
return write_sep(writer, SEP_ANON_BEGIN); |
|
} |
|
|
|
if (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN)) { |
|
assert(writer->list_depth == 0); |
|
copy_node(&writer->list_subj, node); |
|
++writer->list_depth; |
|
++writer->indent; |
|
return write_sep(writer, SEP_LIST_BEGIN); |
|
} |
|
|
|
if (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN)) { |
|
++writer->indent; |
|
++writer->list_depth; |
|
return write_sep(writer, SEP_LIST_BEGIN); |
|
} |
|
|
|
if ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) || |
|
(field == FIELD_OBJECT && (flags & SERD_EMPTY_O))) { |
|
return sink("[]", 2, writer) == 2; |
|
} |
|
} |
|
|
|
sink("_:", 2, writer); |
|
if (writer->bprefix && !strncmp((const char*)node->buf, |
|
(const char*)writer->bprefix, |
|
writer->bprefix_len)) { |
|
sink(node->buf + writer->bprefix_len, |
|
node->n_bytes - writer->bprefix_len, |
|
writer); |
|
} else { |
|
sink(node->buf, node->n_bytes, writer); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
static bool |
|
write_node(SerdWriter* writer, |
|
const SerdNode* node, |
|
const SerdNode* datatype, |
|
const SerdNode* lang, |
|
Field field, |
|
SerdStatementFlags flags) |
|
{ |
|
bool ret = false; |
|
switch (node->type) { |
|
case SERD_NOTHING: |
|
break; |
|
case SERD_LITERAL: |
|
ret = write_literal(writer, node, datatype, lang, flags); |
|
break; |
|
case SERD_URI: |
|
ret = write_uri_node(writer, node, field, flags); |
|
break; |
|
case SERD_CURIE: |
|
ret = write_curie(writer, node, field, flags); |
|
break; |
|
case SERD_BLANK: |
|
ret = write_blank(writer, node, field, flags); |
|
break; |
|
} |
|
|
|
writer->last_sep = SEP_NONE; |
|
return ret; |
|
} |
|
|
|
static inline bool |
|
is_resource(const SerdNode* node) |
|
{ |
|
return node && node->buf && node->type > SERD_LITERAL; |
|
} |
|
|
|
static void |
|
write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) |
|
{ |
|
write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags); |
|
write_sep(writer, SEP_P_O); |
|
copy_node(&writer->context.predicate, pred); |
|
} |
|
|
|
static bool |
|
write_list_obj(SerdWriter* writer, |
|
SerdStatementFlags flags, |
|
const SerdNode* predicate, |
|
const SerdNode* object, |
|
const SerdNode* datatype, |
|
const SerdNode* lang) |
|
{ |
|
if (!strcmp((const char*)object->buf, NS_RDF "nil")) { |
|
--writer->indent; |
|
write_sep(writer, SEP_LIST_END); |
|
return true; |
|
} |
|
|
|
if (!strcmp((const char*)predicate->buf, NS_RDF "first")) { |
|
write_sep(writer, SEP_LIST_SEP); |
|
write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); |
|
} |
|
|
|
return false; |
|
} |
|
|
|
SerdStatus |
|
serd_writer_write_statement(SerdWriter* writer, |
|
SerdStatementFlags flags, |
|
const SerdNode* graph, |
|
const SerdNode* subject, |
|
const SerdNode* predicate, |
|
const SerdNode* object, |
|
const SerdNode* datatype, |
|
const SerdNode* lang) |
|
{ |
|
if (!is_resource(subject) || !is_resource(predicate) || !object || |
|
!object->buf) { |
|
return SERD_ERR_BAD_ARG; |
|
} |
|
|
|
#define TRY(write_result) \ |
|
do { \ |
|
if (!(write_result)) { \ |
|
return SERD_ERR_UNKNOWN; \ |
|
} \ |
|
} while (0) |
|
|
|
if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { |
|
TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); |
|
sink(" ", 1, writer); |
|
TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags)); |
|
sink(" ", 1, writer); |
|
TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags)); |
|
if (writer->syntax == SERD_NQUADS && graph) { |
|
sink(" ", 1, writer); |
|
TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); |
|
} |
|
sink(" .\n", 3, writer); |
|
return SERD_SUCCESS; |
|
} |
|
|
|
if ((graph && !serd_node_equals(graph, &writer->context.graph)) || |
|
(!graph && writer->context.graph.type)) { |
|
writer->indent = 0; |
|
|
|
if (writer->context.subject.type) { |
|
write_sep(writer, SEP_END_S); |
|
} |
|
|
|
if (writer->context.graph.type) { |
|
write_sep(writer, SEP_GRAPH_END); |
|
} |
|
|
|
reset_context(writer, true); |
|
if (graph) { |
|
TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); |
|
++writer->indent; |
|
write_sep(writer, SEP_GRAPH_BEGIN); |
|
copy_node(&writer->context.graph, graph); |
|
} |
|
} |
|
|
|
if ((flags & SERD_LIST_CONT)) { |
|
if (write_list_obj(writer, flags, predicate, object, datatype, lang)) { |
|
// Reached end of list |
|
if (--writer->list_depth == 0 && writer->list_subj.type) { |
|
reset_context(writer, false); |
|
serd_node_free(&writer->context.subject); |
|
writer->context.subject = writer->list_subj; |
|
writer->list_subj = SERD_NODE_NULL; |
|
} |
|
return SERD_SUCCESS; |
|
} |
|
} else if (serd_node_equals(subject, &writer->context.subject)) { |
|
if (serd_node_equals(predicate, &writer->context.predicate)) { |
|
// Abbreviate S P |
|
if (!(flags & SERD_ANON_O_BEGIN)) { |
|
++writer->indent; |
|
} |
|
write_sep(writer, SEP_END_O); |
|
write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); |
|
if (!(flags & SERD_ANON_O_BEGIN)) { |
|
--writer->indent; |
|
} |
|
} else { |
|
// Abbreviate S |
|
Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P; |
|
write_sep(writer, sep); |
|
write_pred(writer, flags, predicate); |
|
write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); |
|
} |
|
} else { |
|
// No abbreviation |
|
if (writer->context.subject.type) { |
|
assert(writer->indent > 0); |
|
--writer->indent; |
|
if (serd_stack_is_empty(&writer->anon_stack)) { |
|
write_sep(writer, SEP_END_S); |
|
} |
|
} else if (!writer->empty) { |
|
write_sep(writer, SEP_S_P); |
|
} |
|
|
|
if (!(flags & SERD_ANON_CONT)) { |
|
write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); |
|
++writer->indent; |
|
write_sep(writer, SEP_S_P); |
|
} else { |
|
++writer->indent; |
|
} |
|
|
|
reset_context(writer, false); |
|
copy_node(&writer->context.subject, subject); |
|
|
|
if (!(flags & SERD_LIST_S_BEGIN)) { |
|
write_pred(writer, flags, predicate); |
|
} |
|
|
|
write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); |
|
} |
|
|
|
if (flags & (SERD_ANON_S_BEGIN | SERD_ANON_O_BEGIN)) { |
|
WriteContext* ctx = |
|
(WriteContext*)serd_stack_push(&writer->anon_stack, sizeof(WriteContext)); |
|
*ctx = writer->context; |
|
WriteContext new_context = { |
|
serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL}; |
|
if ((flags & SERD_ANON_S_BEGIN)) { |
|
new_context.predicate = serd_node_copy(predicate); |
|
} |
|
writer->context = new_context; |
|
} else { |
|
copy_node(&writer->context.graph, graph); |
|
copy_node(&writer->context.subject, subject); |
|
copy_node(&writer->context.predicate, predicate); |
|
} |
|
|
|
return SERD_SUCCESS; |
|
} |
|
|
|
SerdStatus |
|
serd_writer_end_anon(SerdWriter* writer, const SerdNode* node) |
|
{ |
|
if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { |
|
return SERD_SUCCESS; |
|
} |
|
|
|
if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) { |
|
w_err(writer, SERD_ERR_UNKNOWN, "unexpected end of anonymous node\n"); |
|
return SERD_ERR_UNKNOWN; |
|
} |
|
|
|
--writer->indent; |
|
write_sep(writer, SEP_ANON_END); |
|
free_context(writer); |
|
writer->context = *anon_stack_top(writer); |
|
serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); |
|
const bool is_subject = serd_node_equals(node, &writer->context.subject); |
|
if (is_subject) { |
|
copy_node(&writer->context.subject, node); |
|
writer->context.predicate.type = SERD_NOTHING; |
|
} |
|
|
|
return SERD_SUCCESS; |
|
} |
|
|
|
SerdStatus |
|
serd_writer_finish(SerdWriter* writer) |
|
{ |
|
if (writer->context.subject.type) { |
|
write_sep(writer, SEP_END_S); |
|
} |
|
|
|
if (writer->context.graph.type) { |
|
write_sep(writer, SEP_GRAPH_END); |
|
} |
|
|
|
serd_byte_sink_flush(&writer->byte_sink); |
|
writer->indent = 0; |
|
return free_context(writer); |
|
} |
|
|
|
SerdWriter* |
|
serd_writer_new(SerdSyntax syntax, |
|
SerdStyle style, |
|
SerdEnv* env, |
|
const SerdURI* base_uri, |
|
SerdSink ssink, |
|
void* stream) |
|
{ |
|
const WriteContext context = WRITE_CONTEXT_NULL; |
|
SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); |
|
|
|
writer->syntax = syntax; |
|
writer->style = style; |
|
writer->env = env; |
|
writer->root_node = SERD_NODE_NULL; |
|
writer->root_uri = SERD_URI_NULL; |
|
writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; |
|
writer->anon_stack = serd_stack_new(4 * sizeof(WriteContext)); |
|
writer->context = context; |
|
writer->list_subj = SERD_NODE_NULL; |
|
writer->empty = true; |
|
writer->byte_sink = serd_byte_sink_new( |
|
ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1); |
|
|
|
return writer; |
|
} |
|
|
|
void |
|
serd_writer_set_error_sink(SerdWriter* writer, |
|
SerdErrorSink error_sink, |
|
void* error_handle) |
|
{ |
|
writer->error_sink = error_sink; |
|
writer->error_handle = error_handle; |
|
} |
|
|
|
void |
|
serd_writer_chop_blank_prefix(SerdWriter* writer, const uint8_t* prefix) |
|
{ |
|
free(writer->bprefix); |
|
writer->bprefix_len = 0; |
|
writer->bprefix = NULL; |
|
|
|
const size_t prefix_len = prefix ? strlen((const char*)prefix) : 0; |
|
if (prefix_len) { |
|
writer->bprefix_len = prefix_len; |
|
writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1); |
|
memcpy(writer->bprefix, prefix, writer->bprefix_len + 1); |
|
} |
|
} |
|
|
|
SerdStatus |
|
serd_writer_set_base_uri(SerdWriter* writer, const SerdNode* uri) |
|
{ |
|
if (!serd_env_set_base_uri(writer->env, uri)) { |
|
serd_env_get_base_uri(writer->env, &writer->base_uri); |
|
|
|
if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { |
|
if (writer->context.graph.type || writer->context.subject.type) { |
|
sink(" .\n\n", 4, writer); |
|
reset_context(writer, true); |
|
} |
|
sink("@base <", 7, writer); |
|
sink(uri->buf, uri->n_bytes, writer); |
|
sink("> .\n", 4, writer); |
|
} |
|
writer->indent = 0; |
|
return reset_context(writer, true); |
|
} |
|
|
|
return SERD_ERR_UNKNOWN; |
|
} |
|
|
|
SerdStatus |
|
serd_writer_set_root_uri(SerdWriter* writer, const SerdNode* uri) |
|
{ |
|
serd_node_free(&writer->root_node); |
|
|
|
if (uri && uri->buf) { |
|
writer->root_node = serd_node_copy(uri); |
|
serd_uri_parse(uri->buf, &writer->root_uri); |
|
} else { |
|
writer->root_node = SERD_NODE_NULL; |
|
writer->root_uri = SERD_URI_NULL; |
|
} |
|
|
|
return SERD_SUCCESS; |
|
} |
|
|
|
SerdStatus |
|
serd_writer_set_prefix(SerdWriter* writer, |
|
const SerdNode* name, |
|
const SerdNode* uri) |
|
{ |
|
if (!serd_env_set_prefix(writer->env, name, uri)) { |
|
if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { |
|
if (writer->context.graph.type || writer->context.subject.type) { |
|
sink(" .\n\n", 4, writer); |
|
reset_context(writer, true); |
|
} |
|
sink("@prefix ", 8, writer); |
|
sink(name->buf, name->n_bytes, writer); |
|
sink(": <", 3, writer); |
|
write_uri(writer, uri->buf, uri->n_bytes); |
|
sink("> .\n", 4, writer); |
|
} |
|
writer->indent = 0; |
|
return reset_context(writer, true); |
|
} |
|
|
|
return SERD_ERR_UNKNOWN; |
|
} |
|
|
|
void |
|
serd_writer_free(SerdWriter* writer) |
|
{ |
|
if (!writer) { |
|
return; |
|
} |
|
|
|
serd_writer_finish(writer); |
|
serd_stack_free(&writer->anon_stack); |
|
free(writer->bprefix); |
|
serd_byte_sink_free(&writer->byte_sink); |
|
serd_node_free(&writer->root_node); |
|
free(writer); |
|
} |
|
|
|
SerdEnv* |
|
serd_writer_get_env(SerdWriter* writer) |
|
{ |
|
return writer->env; |
|
} |
|
|
|
size_t |
|
serd_file_sink(const void* buf, size_t len, void* stream) |
|
{ |
|
return fwrite(buf, 1, len, (FILE*)stream); |
|
} |
|
|
|
size_t |
|
serd_chunk_sink(const void* buf, size_t len, void* stream) |
|
{ |
|
SerdChunk* chunk = (SerdChunk*)stream; |
|
chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len); |
|
memcpy((uint8_t*)chunk->buf + chunk->len, buf, len); |
|
chunk->len += len; |
|
return len; |
|
} |
|
|
|
uint8_t* |
|
serd_chunk_sink_finish(SerdChunk* stream) |
|
{ |
|
serd_chunk_sink("", 1, stream); |
|
return (uint8_t*)stream->buf; |
|
}
|
|
|