Browse Source

Preliminary support for writing abbreviated Turtle.

git-svn-id: http://svn.drobilla.net/serd/trunk@22 490d8e77-9747-427b-9fa3-0b8f29cee8a0
zrythm_meson
David Robillard 12 years ago
parent
commit
ff9510dc36
  1. 21
      serd/serd.h
  2. 54
      src/serdi.c
  3. 227
      src/writer.c
  4. 36
      wscript

21
serd/serd.h

@ -244,10 +244,17 @@ serd_reader_free(SerdReader reader); @@ -244,10 +244,17 @@ serd_reader_free(SerdReader reader);
* @{
*/
typedef enum {
SERD_STYLE_ABBREVIATED = 1,
SERD_STYLE_ASCII = 1 << 1,
SERD_STYLE_ESCAPE_
} SerdStyle;
/** Create a new RDF writer. */
SERD_API
SerdWriter
serd_writer_new(SerdSyntax syntax,
SerdStyle style,
SerdNamespaces ns,
const SerdURI* base_uri,
SerdSink sink,
@ -258,12 +265,19 @@ SERD_API @@ -258,12 +265,19 @@ SERD_API
void
serd_writer_free(SerdWriter writer);
/** Set the base URI of writer. */
/** Set the current output base URI. */
SERD_API
void
serd_writer_set_base_uri(SerdWriter writer,
const SerdURI* uri);
/** Set the current output base URI. */
SERD_API
void
serd_writer_set_prefix(SerdWriter writer,
const SerdString* name,
const SerdString* uri);
/** Write a statement. */
SERD_API
bool
@ -278,6 +292,11 @@ serd_writer_write_statement(SerdWriter writer, @@ -278,6 +292,11 @@ serd_writer_write_statement(SerdWriter writer,
const SerdString* object_datatype,
const SerdString* object_lang);
/** Finish a write. */
SERD_API
void
serd_writer_finish(SerdWriter writer);
/** @} */
/** @} */

54
src/serdi.c

@ -90,6 +90,8 @@ event_prefix(void* handle, @@ -90,6 +90,8 @@ event_prefix(void* handle,
} else {
serd_namespaces_add(state->ns, name, uri_string);
}
serd_writer_set_prefix(state->writer, name, uri_string);
return true;
}
@ -133,13 +135,39 @@ file_sink(const void* buf, size_t len, void* stream) @@ -133,13 +135,39 @@ file_sink(const void* buf, size_t len, void* stream)
int
main(int argc, char** argv)
{
if (argc != 2 && argc != 3) {
if (argc < 2) {
return print_usage(argv[0], true);
}
const uint8_t* in_filename = (const uint8_t*)argv[1];
FILE* in_fd = NULL;
SerdSyntax output_syntax = SERD_NTRIPLES;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
in_fd = stdin;
break;
} else if (argv[a][1] == 'o') {
if (++a == argc) {
fprintf(stderr, "missing value for -i\n");
return 1;
}
if (!strcmp(argv[a], "turtle")) {
output_syntax = SERD_TURTLE;
} else if (!strcmp(argv[a], "ntriples")) {
output_syntax = SERD_NTRIPLES;
} else {
fprintf(stderr, "unknown output format `%s'\n", argv[a]);
}
} else {
fprintf(stderr, "unknown option `%s'\n", argv[a]);
return print_usage(argv[0], true);
}
}
const uint8_t* in_filename = (const uint8_t*)argv[a];
if (serd_uri_string_has_scheme(in_filename)) {
if (!in_fd && serd_uri_string_has_scheme(in_filename)) {
// Input is an absolute URI, ensure it's a file: URI and chop scheme
if (strncmp((const char*)in_filename, "file:", 5)) {
fprintf(stderr, "unsupported URI scheme `%s'\n", in_filename);
@ -166,8 +194,11 @@ main(int argc, char** argv) @@ -166,8 +194,11 @@ main(int argc, char** argv)
serd_uri_parse(base_uri_str->buf, &base_uri);
FILE* const in_fd = fopen((const char*)in_filename, "r");
FILE* out_fd = stdout;
if (!in_fd) {
in_fd = fopen((const char*)in_filename, "r");
}
FILE* out_fd = stdout;
if (!in_fd) {
fprintf(stderr, "failed to open file %s\n", in_filename);
@ -175,8 +206,14 @@ main(int argc, char** argv) @@ -175,8 +206,14 @@ main(int argc, char** argv)
}
SerdNamespaces ns = serd_namespaces_new();
SerdStyle output_style = (output_syntax == SERD_NTRIPLES)
? SERD_STYLE_ASCII
: SERD_STYLE_ABBREVIATED;
State state = {
serd_writer_new(SERD_NTRIPLES, ns, &base_uri, file_sink, out_fd),
serd_writer_new(output_syntax, output_style,
ns, &base_uri, file_sink, out_fd),
ns, base_uri_str, base_uri
};
@ -186,8 +223,11 @@ main(int argc, char** argv) @@ -186,8 +223,11 @@ main(int argc, char** argv)
const bool success = serd_reader_read_file(reader, in_fd, in_filename);
serd_reader_free(reader);
fclose(in_fd);
serd_namespaces_free(state.ns);
serd_writer_finish(state.writer);
serd_writer_free(state.writer);
serd_namespaces_free(state.ns);
serd_string_free(state.base_uri_str);
if (success) {

227
src/writer.c

@ -21,16 +21,48 @@ @@ -21,16 +21,48 @@
#include "serd/serd.h"
typedef bool (*StatementWriter)(SerdWriter writer,
const SerdString* graph,
const SerdString* subject,
SerdNodeType subject_type,
const SerdString* predicate,
SerdNodeType predicate_type,
const SerdString* object,
SerdNodeType object_type,
const SerdString* object_datatype,
const SerdString* object_lang);
typedef bool (*NodeWriter)(SerdWriter writer,
SerdNodeType type,
const SerdString* str,
const SerdString* datatype,
const SerdString* lang);
struct SerdWriterImpl {
SerdSyntax syntax;
SerdNamespaces ns;
SerdURI base_uri;
SerdSink sink;
void* stream;
SerdSyntax syntax;
SerdStyle style;
SerdNamespaces ns;
SerdURI base_uri;
SerdSink sink;
void* stream;
StatementWriter write_statement;
NodeWriter write_node;
const SerdString* prev_g;
const SerdString* prev_s;
const SerdString* prev_p;
const SerdString* prev_o;
unsigned indent;
};
typedef enum {
WRITE_NORMAL,
WRITE_URI,
WRITE_STRING
} WriteContext;
static bool
serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const uint8_t esc)
write_text(SerdWriter writer, WriteContext ctx,
const uint8_t* utf8, size_t n_bytes, uint8_t terminator)
{
char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (size_t i = 0; i < n_bytes;) {
@ -40,12 +72,16 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u @@ -40,12 +72,16 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
case '\n': writer->sink("\\n", 2, writer->stream); continue;
case '\r': writer->sink("\\r", 2, writer->stream); continue;
case '\t': writer->sink("\\t", 2, writer->stream); continue;
case '"': if (esc == '"') { writer->sink("\\\"", 2, writer->stream); continue; }
case '"':
if (terminator == '"') {
writer->sink("\\\"", 2, writer->stream);
continue;
} // else fall-through
default: break;
}
if (in == esc) {
sprintf(escape, "\\u%04X", esc);
if (in == terminator) {
sprintf(escape, "\\u%04X", terminator);
writer->sink(escape, 6, writer->stream);
continue;
}
@ -79,6 +115,13 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u @@ -79,6 +115,13 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
return false;
}
if (!(writer->style & SERD_STYLE_ASCII)) {
// Write UTF-8 input directly to UTF-8 output
writer->sink(utf8, n_bytes, writer->stream);
i += n_bytes - 1;
continue;
}
#define READ_BYTE() do { \
assert(i < n_bytes); \
in = utf8[i++] & 0x3f; \
@ -106,11 +149,11 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u @@ -106,11 +149,11 @@ serd_write_ascii(SerdWriter writer, const uint8_t* utf8, size_t n_bytes, const u
}
static bool
serd_write_node(SerdWriter writer,
SerdNodeType type,
const SerdString* str,
const SerdString* datatype,
const SerdString* lang)
write_node(SerdWriter writer,
SerdNodeType type,
const SerdString* str,
const SerdString* datatype,
const SerdString* lang)
{
const SerdURI* base_uri = &writer->base_uri;
SerdNamespaces ns = writer->ns;
@ -123,14 +166,20 @@ serd_write_node(SerdWriter writer, @@ -123,14 +166,20 @@ serd_write_node(SerdWriter writer,
writer->sink(str->buf, str->n_bytes - 1, writer->stream);
break;
case QNAME:
if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) {
fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
return false;
switch (writer->syntax) {
case SERD_NTRIPLES:
if (!serd_namespaces_expand(ns, str, &uri_prefix, &uri_suffix)) {
fprintf(stderr, "error: undefined namespace prefix `%s'\n", str->buf);
return false;
}
writer->sink("<", 1, writer->stream);
write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>');
write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>');
writer->sink(">", 1, writer->stream);
break;
case SERD_TURTLE:
writer->sink(str->buf, str->n_bytes - 1, writer->stream);
}
writer->sink("<", 1, writer->stream);
serd_write_ascii(writer, uri_prefix.buf, uri_prefix.len, '>');
serd_write_ascii(writer, uri_suffix.buf, uri_suffix.len, '>');
writer->sink(">", 1, writer->stream);
break;
case URI:
if (!serd_uri_string_has_scheme(str->buf)) {
@ -146,27 +195,44 @@ serd_write_node(SerdWriter writer, @@ -146,27 +195,44 @@ serd_write_node(SerdWriter writer,
}
} else {
writer->sink("<", 1, writer->stream);
serd_write_ascii(writer, str->buf, str->n_bytes - 1, '>');
write_text(writer, WRITE_URI, str->buf, str->n_bytes - 1, '>');
writer->sink(">", 1, writer->stream);
return true;
}
return false;
case LITERAL:
writer->sink("\"", 1, writer->stream);
serd_write_ascii(writer, str->buf, str->n_bytes - 1, '"');
write_text(writer, WRITE_STRING, str->buf, str->n_bytes - 1, '"');
writer->sink("\"", 1, writer->stream);
if (lang) {
writer->sink("@", 1, writer->stream);
writer->sink(lang->buf, lang->n_bytes - 1, writer->stream);
} else if (datatype) {
writer->sink("^^", 2, writer->stream);
serd_write_node(writer, URI, datatype, NULL, NULL);
write_node(writer, URI, datatype, NULL, NULL);
}
break;
}
return true;
}
static void
serd_writer_write_delim(SerdWriter writer, const uint8_t delim)
{
switch (delim) {
case 0:
case '\n':
break;
default:
writer->sink(" ", 1, writer->stream);
writer->sink(&delim, 1, writer->stream);
}
writer->sink("\n", 1, writer->stream);
for (unsigned i = 0; i < writer->indent; ++i) {
writer->sink("\t", 1, writer->stream);
}
}
SERD_API
bool
serd_writer_write_statement(SerdWriter writer,
@ -180,18 +246,98 @@ serd_writer_write_statement(SerdWriter writer, @@ -180,18 +246,98 @@ serd_writer_write_statement(SerdWriter writer,
const SerdString* object_datatype,
const SerdString* object_lang)
{
serd_write_node(writer, subject_type, subject, NULL, NULL);
return writer->write_statement(writer,
graph,
subject, subject_type,
predicate, predicate_type,
object, object_type, object_datatype, object_lang);
}
static bool
serd_writer_write_statement_abbrev(SerdWriter writer,
const SerdString* graph,
const SerdString* subject,
SerdNodeType subject_type,
const SerdString* predicate,
SerdNodeType predicate_type,
const SerdString* object,
SerdNodeType object_type,
const SerdString* object_datatype,
const SerdString* object_lang)
{
assert(subject && predicate && object);
if (subject == writer->prev_s) {
if (predicate == writer->prev_p) {
++writer->indent;
serd_writer_write_delim(writer, ',');
write_node(writer, object_type, object, object_datatype, object_lang);
--writer->indent;
} else {
serd_writer_write_delim(writer, ';');
write_node(writer, predicate_type, predicate, NULL, NULL);
writer->sink(" ", 1, writer->stream);
write_node(writer, object_type, object, object_datatype, object_lang);
}
} else {
if (writer->prev_s) {
--writer->indent;
serd_writer_write_delim(writer, '.');
serd_writer_write_delim(writer, '\n');
}
write_node(writer, subject_type, subject, NULL, NULL);
++writer->indent;
serd_writer_write_delim(writer, 0);
writer->sink(" ", 1, writer->stream);
write_node(writer, predicate_type, predicate, NULL, NULL);
writer->sink(" ", 1, writer->stream);
write_node(writer, object_type, object, object_datatype, object_lang);
}
writer->prev_g = graph;
writer->prev_s = subject;
writer->prev_p = predicate;
writer->prev_o = object;
return true;
}
SERD_API
bool
serd_writer_write_statement_flat(SerdWriter writer,
const SerdString* graph,
const SerdString* subject,
SerdNodeType subject_type,
const SerdString* predicate,
SerdNodeType predicate_type,
const SerdString* object,
SerdNodeType object_type,
const SerdString* object_datatype,
const SerdString* object_lang)
{
assert(subject && predicate && object);
write_node(writer, subject_type, subject, NULL, NULL);
writer->sink(" ", 1, writer->stream);
serd_write_node(writer, predicate_type, predicate, NULL, NULL);
write_node(writer, predicate_type, predicate, NULL, NULL);
writer->sink(" ", 1, writer->stream);
serd_write_node(writer, object_type, object, object_datatype, object_lang);
write_node(writer, object_type, object, object_datatype, object_lang);
writer->sink(" .\n", 3, writer->stream);
return true;
}
SERD_API
void
serd_writer_finish(SerdWriter writer)
{
if (writer->prev_s) {
writer->sink(" .\n", 3, writer->stream);
}
}
SERD_API
SerdWriter
serd_writer_new(SerdSyntax syntax,
SerdStyle style,
SerdNamespaces ns,
const SerdURI* base_uri,
SerdSink sink,
@ -199,10 +345,22 @@ serd_writer_new(SerdSyntax syntax, @@ -199,10 +345,22 @@ serd_writer_new(SerdSyntax syntax,
{
SerdWriter writer = malloc(sizeof(struct SerdWriterImpl));
writer->syntax = syntax;
writer->style = style;
writer->ns = ns;
writer->base_uri = *base_uri;
writer->sink = sink;
writer->stream = stream;
writer->prev_g = 0;
writer->prev_s = 0;
writer->prev_p = 0;
writer->prev_o = 0;
writer->indent = 0;
writer->write_node = write_node;
if ((style & SERD_STYLE_ABBREVIATED)) {
writer->write_statement = serd_writer_write_statement_abbrev;
} else {
writer->write_statement = serd_writer_write_statement_flat;
}
return writer;
}
@ -214,6 +372,21 @@ serd_writer_set_base_uri(SerdWriter writer, @@ -214,6 +372,21 @@ serd_writer_set_base_uri(SerdWriter writer,
writer->base_uri = *uri;
}
SERD_API
void
serd_writer_set_prefix(SerdWriter writer,
const SerdString* name,
const SerdString* uri)
{
if (writer->syntax != SERD_NTRIPLES) {
writer->sink("@prefix ", 8, writer->stream);
writer->sink(name->buf, name->n_bytes - 1, writer->stream);
writer->sink(": <", 3, writer->stream);
write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>');
writer->sink("> .\n", 4, writer->stream);
}
}
SERD_API
void
serd_writer_free(SerdWriter writer)

36
wscript

@ -124,10 +124,20 @@ def test(ctx): @@ -124,10 +124,20 @@ def test(ctx):
autowaf.pre_test(ctx, APPNAME)
autowaf.run_tests(ctx, APPNAME,
['./serdi_static > /dev/null',
'./serdi_static ftp://example.org/unsupported.ttl > /dev/null'],
1, name='serdi-fail')
autowaf.run_tests(ctx, APPNAME,
['./serdi_static file:../tests/manifest.ttl > /dev/null',
'./serdi_static ../tests/UTF-8.ttl > /dev/null'],
0, name='serdi-succeed')
commands = []
for test in good_tests:
base_uri = 'http://www.w3.org/2001/sw/DataAccess/df1/' + test
commands = commands + [ './serdi_static ../%s \'%s\' > %s.out' % (test, base_uri, test) ]
commands += [ './serdi_static ../%s \'%s\' > %s.out' % (test, base_uri, test) ]
autowaf.run_tests(ctx, APPNAME, commands, 0, name='good')
@ -145,24 +155,16 @@ def test(ctx): @@ -145,24 +155,16 @@ def test(ctx):
commands = []
for test in bad_tests:
commands = commands + [ './serdi_static ../%s \'http://www.w3.org/2001/sw/DataAccess/df1/%s\' > %s.out' % (test, test, test) ]
commands += [ './serdi_static ../%s \'http://www.w3.org/2001/sw/DataAccess/df1/%s\' > %s.out' % (test, test, test) ]
autowaf.run_tests(ctx, APPNAME, commands, 1, name='bad')
autowaf.run_tests(ctx, APPNAME,
['./serdi_static > /dev/null'],
1, name='serdi-no-args')
autowaf.run_tests(ctx, APPNAME,
['./serdi_static file:../tests/manifest.ttl > /dev/null'],
0, name='serdi-file-uri')
autowaf.run_tests(ctx, APPNAME,
['./serdi_static ftp://example.org/unsupported.ttl > /dev/null'],
1, name='serdi-bad-uri')
autowaf.run_tests(ctx, APPNAME,
['./serdi_static ../tests/UTF-8.ttl > /dev/null'],
0, name='utf8')
# commands = []
# for test in good_tests:
# out_filename = test + '.thru'
# commands += [ './serdi_static -o turtle ../%s \'%s\' | ./serdi_static - \'%s\' > %s.out' % (test, base_uri, base_uri, test) ]
#
# autowaf.run_tests(ctx, APPNAME, commands, 0, name='turtle-write')
#
autowaf.post_test(ctx, APPNAME)

Loading…
Cancel
Save