Serd subproject with meson
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

409 lines
12 KiB

/* Serd, an RDF serialisation library.
* Copyright 2011 David Robillard <d@drobilla.net>
*
* Serd is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Serd is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include "serd/serd.h"
typedef bool (*StatementWriter)(SerdWriter writer,
const SerdString* graph,
const SerdString* subject,
SerdNodeType subject_type,
const SerdString* predicate,
SerdNodeType predicate_type,
const SerdString* object,
SerdNodeType object_type,
const SerdString* object_datatype,
const SerdString* object_lang);
typedef bool (*NodeWriter)(SerdWriter writer,
SerdNodeType type,
const SerdString* str,
const SerdString* datatype,
const SerdString* lang);
struct SerdWriterImpl {
SerdSyntax syntax;
SerdStyle style;
SerdNamespaces ns;
SerdURI base_uri;
SerdSink sink;
void* stream;
StatementWriter write_statement;
NodeWriter write_node;
const SerdString* prev_g;
const SerdString* prev_s;
const SerdString* prev_p;
unsigned indent;
};
typedef enum {
WRITE_NORMAL,
WRITE_URI,
WRITE_STRING
} WriteContext;
static bool
write_text(SerdWriter writer, WriteContext ctx,
const uint8_t* utf8, size_t n_bytes, uint8_t terminator)
{
char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (size_t i = 0; i < n_bytes;) {
uint8_t in = utf8[i++];
switch (in) {
case '\\': writer->sink("\\\\", 2, writer->stream); continue;
case '\n': writer->sink("\\n", 2, writer->stream); continue;
case '\r': writer->sink("\\r", 2, writer->stream); continue;
case '\t': writer->sink("\\t", 2, writer->stream); continue;
case '"':
if (terminator == '"') {
writer->sink("\\\"", 2, writer->stream);
continue;
} // else fall-through
default: break;
}
if (in == terminator) {
snprintf(escape, 7, "\\u%04X", terminator);
writer->sink(escape, 6, writer->stream);
continue;
}
uint32_t c = 0;
size_t size = 0;
if ((in & 0x80) == 0) { // Starts with `0'
size = 1;
c = in & 0x7F;
if ((in >= 0x20) && (in <= 0x7E)) { // Printable ASCII
writer->sink(&in, 1, writer->stream);
continue;
}
} else if ((in & 0xE0) == 0xC0) { // Starts with `110'
size = 2;
c = in & 0x1F;
} else if ((in & 0xF0) == 0xE0) { // Starts with `1110'
size = 3;
c = in & 0x0F;
} else if ((in & 0xF8) == 0xF0) { // Starts with `11110'
size = 4;
c = in & 0x07;
} else if ((in & 0xFC) == 0xF8) { // Starts with `111110'
size = 5;
c = in & 0x03;
} else if ((in & 0xFE) == 0xFC) { // Starts with `1111110'
size = 6;
c = in & 0x01;
} else {
fprintf(stderr, "invalid UTF-8 at offset %zu: %X\n", i, in);
return false;
}
if (ctx == WRITE_STRING && !(writer->style & SERD_STYLE_ASCII)) {
// Write UTF-8 character directly to UTF-8 output
writer->sink(utf8 + i - 1, size, writer->stream);
i += size - 1;
continue;
}
#define READ_BYTE() do { \
assert(i < n_bytes); \
in = utf8[i++] & 0x3f; \
c <<= 6; \
c |= in; \
} while (0)
switch (size) {
case 6: READ_BYTE();
case 5: READ_BYTE();
case 4: READ_BYTE();
case 3: READ_BYTE();
case 2: READ_BYTE();
}
if (c < 0xFFFF) {
snprintf(escape, 7, "\\u%04X", c);
writer->sink(escape, 6, writer->stream);
} else {
snprintf(escape, 11, "\\U%08X", c);
writer->sink(escape, 10, writer->stream);
}
}
return true;
}
static bool
write_node(SerdWriter writer,
SerdNodeType type,
const SerdString* str,
const SerdString* datatype,
const SerdString* lang)
{
const SerdURI* base_uri = &writer->base_uri;
SerdNamespaces ns = writer->ns;
SerdChunk uri_prefix;
SerdChunk uri_suffix;
switch (type) {
case BLANK:
writer->sink("_:", 2, writer->stream);
writer->sink(str->buf, str->n_bytes - 1, writer->stream);
break;
case QNAME:
switch (writer->syntax) {
case SERD_NTRIPLES:
if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) {
fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
return false;
}
writer->sink("<", 1, writer->stream);
write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>');
write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>');
writer->sink(">", 1, writer->stream);
break;
case SERD_TURTLE:
writer->sink(str->buf, str->n_bytes - 1, writer->stream);
}
break;
case URI:
if (!serd_uri_string_has_scheme(str->buf)) {
SerdURI uri;
if (serd_uri_parse(str->buf, &uri)) {
SerdURI abs_uri;
if (serd_uri_resolve(&uri, base_uri, &abs_uri)) {
writer->sink("<", 1, writer->stream);
serd_uri_serialise(&abs_uri, writer->sink, writer->stream);
writer->sink(">", 1, writer->stream);
return true;
}
}
} else {
writer->sink("<", 1, writer->stream);
write_text(writer, WRITE_URI, str->buf, str->n_bytes - 1, '>');
writer->sink(">", 1, writer->stream);
return true;
}
return false;
case LITERAL:
writer->sink("\"", 1, writer->stream);
write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"');
writer->sink("\"", 1, writer->stream);
if (lang) {
writer->sink("@", 1, writer->stream);
writer->sink(lang->buf, lang->n_bytes - 1, writer->stream);
} else if (datatype) {
writer->sink("^^", 2, writer->stream);
write_node(writer, URI, datatype, NULL, NULL);
}
break;
}
return true;
}
static void
serd_writer_write_delim(SerdWriter writer, const uint8_t delim)
{
switch (delim) {
case 0:
case '\n':
break;
default:
writer->sink(" ", 1, writer->stream);
writer->sink(&delim, 1, writer->stream);
}
writer->sink("\n", 1, writer->stream);
for (unsigned i = 0; i < writer->indent; ++i) {
writer->sink("\t", 1, writer->stream);
}
}
SERD_API
bool
serd_writer_write_statement(SerdWriter writer,
const SerdString* graph,
const SerdString* subject,
SerdNodeType subject_type,
const SerdString* predicate,
SerdNodeType predicate_type,
const SerdString* object,
SerdNodeType object_type,
const SerdString* object_datatype,
const SerdString* object_lang)
{
return writer->write_statement(writer,
graph,
subject, subject_type,
predicate, predicate_type,
object, object_type, object_datatype, object_lang);
}
static bool
serd_writer_write_statement_abbrev(SerdWriter writer,
const SerdString* graph,
const SerdString* subject,
SerdNodeType subject_type,
const SerdString* predicate,
SerdNodeType predicate_type,
const SerdString* object,
SerdNodeType object_type,
const SerdString* object_datatype,
const SerdString* object_lang)
{
assert(subject && predicate && object);
if (subject == writer->prev_s) {
if (predicate == writer->prev_p) {
++writer->indent;
serd_writer_write_delim(writer, ',');
write_node(writer, object_type, object, object_datatype, object_lang);
--writer->indent;
} else {
serd_writer_write_delim(writer, ';');
write_node(writer, predicate_type, predicate, NULL, NULL);
writer->sink(" ", 1, writer->stream);
write_node(writer, object_type, object, object_datatype, object_lang);
}
} else {
if (writer->prev_s) {
--writer->indent;
serd_writer_write_delim(writer, '.');
serd_writer_write_delim(writer, '\n');
}
write_node(writer, subject_type, subject, NULL, NULL);
++writer->indent;
serd_writer_write_delim(writer, 0);
writer->sink(" ", 1, writer->stream);
write_node(writer, predicate_type, predicate, NULL, NULL);
writer->sink(" ", 1, writer->stream);
write_node(writer, object_type, object, object_datatype, object_lang);
}
writer->prev_g = graph;
writer->prev_s = subject;
writer->prev_p = predicate;
return true;
}
SERD_API
bool
serd_writer_write_statement_flat(SerdWriter writer,
const SerdString* graph,
const SerdString* subject,
SerdNodeType subject_type,
const SerdString* predicate,
SerdNodeType predicate_type,
const SerdString* object,
SerdNodeType object_type,
const SerdString* object_datatype,
const SerdString* object_lang)
{
assert(subject && predicate && object);
write_node(writer, subject_type, subject, NULL, NULL);
writer->sink(" ", 1, writer->stream);
write_node(writer, predicate_type, predicate, NULL, NULL);
writer->sink(" ", 1, writer->stream);
write_node(writer, object_type, object, object_datatype, object_lang);
writer->sink(" .\n", 3, writer->stream);
return true;
}
SERD_API
void
serd_writer_finish(SerdWriter writer)
{
if (writer->prev_s) {
writer->sink(" .\n", 3, writer->stream);
}
}
SERD_API
SerdWriter
serd_writer_new(SerdSyntax syntax,
SerdStyle style,
SerdNamespaces ns,
const SerdURI* base_uri,
SerdSink sink,
void* stream)
{
SerdWriter writer = malloc(sizeof(struct SerdWriterImpl));
writer->syntax = syntax;
writer->style = style;
writer->ns = ns;
writer->base_uri = *base_uri;
writer->sink = sink;
writer->stream = stream;
writer->prev_g = 0;
writer->prev_s = 0;
writer->prev_p = 0;
writer->indent = 0;
writer->write_node = write_node;
if ((style & SERD_STYLE_ABBREVIATED)) {
writer->write_statement = serd_writer_write_statement_abbrev;
} else {
writer->write_statement = serd_writer_write_statement_flat;
}
return writer;
}
SERD_API
void
serd_writer_set_base_uri(SerdWriter writer,
const SerdURI* uri)
{
writer->base_uri = *uri;
if (writer->syntax != SERD_NTRIPLES) {
if (writer->prev_g || writer->prev_s) {
writer->sink(" .\n\n", 4, writer->stream);
writer->prev_g = writer->prev_s = writer->prev_p = 0;
}
writer->sink("@base ", 6, writer->stream);
writer->sink(" <", 2, writer->stream);
serd_uri_serialise(uri, writer->sink, writer->stream);
writer->sink("> .\n", 4, writer->stream);
}
}
SERD_API
void
serd_writer_set_prefix(SerdWriter writer,
const SerdString* name,
const SerdString* uri)
{
if (writer->syntax != SERD_NTRIPLES) {
if (writer->prev_g || writer->prev_s) {
writer->sink(" .\n\n", 4, writer->stream);
writer->prev_g = 0;
writer->prev_s = 0;
writer->prev_p = 0;
}
writer->sink("@prefix ", 8, writer->stream);
writer->sink(name->buf, name->n_bytes - 1, writer->stream);
writer->sink(": <", 3, writer->stream);
write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>');
writer->sink("> .\n", 4, writer->stream);
}
}
SERD_API
void
serd_writer_free(SerdWriter writer)
{
SerdWriter const me = (SerdWriter)writer;
free(me);
}