Browse Source

Add test to ensure blank node IDs don't clash with generated IDs.

Add handle destructor parameter to serd_reader_new.
Add serd_reader_get_handle.
Rename serd_reader_set_blank_prefix to serd_reader_add_blank_prefix.
Rename serd_reader_read_file to serd_reader_read_file_handle.
Add new serd_reader_read_file that takes a path/URI parameter.
Add serdi -i option to select input syntax.
Add serdi -p and -c options to add/chop a prefix to/from blank IDs.
Add optional base_uri parameter to serd_env_new.
Add serd_writer_chop_blank_prefix.
Bump version to 0.3.0.


git-svn-id: http://svn.drobilla.net/serd/trunk@183 490d8e77-9747-427b-9fa3-0b8f29cee8a0
zrythm_meson
David Robillard 12 years ago
parent
commit
b92d598a22
  1. 33
      serd/serd.h
  2. 5
      src/env.c
  3. 159
      src/reader.c
  4. 94
      src/serdi.c
  5. 52
      src/writer.c
  6. 2
      tests/test-id.out
  7. 4
      tests/test-id.ttl
  8. 17
      wscript

33
serd/serd.h

@ -428,7 +428,7 @@ typedef SerdStatus (*SerdEndSink)(void* handle, @@ -428,7 +428,7 @@ typedef SerdStatus (*SerdEndSink)(void* handle,
*/
SERD_API
SerdEnv*
serd_env_new();
serd_env_new(const SerdNode* base_uri);
/**
Free @a ns.
@ -512,11 +512,19 @@ SERD_API @@ -512,11 +512,19 @@ SERD_API
SerdReader*
serd_reader_new(SerdSyntax syntax,
void* handle,
void (*free_handle)(void*),
SerdBaseSink base_sink,
SerdPrefixSink prefix_sink,
SerdStatementSink statement_sink,
SerdEndSink end_sink);
/**
Return the @c handle passed to @ref serd_reader_new.
*/
SERD_API
void*
serd_reader_get_handle(const SerdReader* reader);
/**
Set a prefix to be added to all blank node identifiers.
@ -528,17 +536,26 @@ serd_reader_new(SerdSyntax syntax, @@ -528,17 +536,26 @@ serd_reader_new(SerdSyntax syntax,
*/
SERD_API
void
serd_reader_set_blank_prefix(SerdReader* reader,
serd_reader_add_blank_prefix(SerdReader* reader,
const uint8_t* prefix);
/**
Read @a file.
@param Path or file: URI of file to read.
*/
SERD_API
SerdStatus
serd_reader_read_file(SerdReader* reader,
FILE* file,
const uint8_t* name);
const uint8_t* uri);
/**
Read @a file.
*/
SERD_API
SerdStatus
serd_reader_read_file_handle(SerdReader* reader,
FILE* file,
const uint8_t* name);
/**
Read @a utf8.
@ -579,6 +596,14 @@ SERD_API @@ -579,6 +596,14 @@ SERD_API
void
serd_writer_free(SerdWriter* writer);
/**
Set a prefix to be removed from matching blank node identifiers.
*/
SERD_API
void
serd_writer_chop_blank_prefix(SerdWriter* writer,
const uint8_t* prefix);
/**
Set the current output base URI (and emit directive if applicable).

5
src/env.c

@ -35,13 +35,16 @@ struct SerdEnvImpl { @@ -35,13 +35,16 @@ struct SerdEnvImpl {
SERD_API
SerdEnv*
serd_env_new()
serd_env_new(const SerdNode* base_uri)
{
SerdEnv* env = malloc(sizeof(struct SerdEnvImpl));
env->prefixes = NULL;
env->n_prefixes = 0;
env->base_uri_node = SERD_NODE_NULL;
env->base_uri = SERD_URI_NULL;
if (base_uri) {
serd_env_set_base_uri(env, base_uri);
}
return env;
}

159
src/reader.c

@ -65,6 +65,7 @@ static const Node INTERNAL_NODE_NULL = { 0, 0 }; @@ -65,6 +65,7 @@ static const Node INTERNAL_NODE_NULL = { 0, 0 };
struct SerdReaderImpl {
void* handle;
void (*free_handle)(void*);
SerdBaseSink base_sink;
SerdPrefixSink prefix_sink;
SerdStatementSink statement_sink;
@ -75,9 +76,11 @@ struct SerdReaderImpl { @@ -75,9 +76,11 @@ struct SerdReaderImpl {
Node rdf_nil;
FILE* fd;
SerdStack stack;
SerdSyntax syntax;
Cursor cur;
uint8_t* buf;
const uint8_t* blank_prefix;
uint8_t* bprefix;
size_t bprefix_len;
unsigned next_id;
int err;
uint8_t* read_buf;
@ -241,6 +244,23 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c) @@ -241,6 +244,23 @@ push_byte(SerdReader* reader, Ref ref, const uint8_t c)
str->buf[str->n_bytes] = '\0';
}
static inline void
append_string(SerdReader* reader, Ref ref, const uint8_t* suffix)
{
#ifdef SERD_STACK_CHECK
assert(stack_is_top_string(reader, ref));
#endif
size_t n_bytes;
uint32_t flags = 0;
size_t n_chars = serd_strlen(suffix, &n_bytes, &flags);
serd_stack_push(&reader->stack, n_bytes);
SerdString* const str = deref(reader, ref);
assert(str->n_bytes >= str->n_chars);
memcpy(str->buf + str->n_bytes, suffix, n_bytes + 1);
str->n_bytes += n_bytes;
str->n_chars += n_chars;
}
static void
pop_string(SerdReader* reader, Ref ref)
{
@ -968,20 +988,33 @@ read_nodeID(SerdReader* reader) @@ -968,20 +988,33 @@ read_nodeID(SerdReader* reader)
{
eat_byte(reader, '_');
eat_byte(reader, ':');
Ref str = push_string(reader, "", 0);
return read_name(reader, str, true);
Ref ref = push_string(reader, "", 0);
read_name(reader, ref, true);
SerdString* const str = deref(reader, ref);
if (reader->syntax == SERD_TURTLE
&& !strncmp((const char*)str->buf, "genid", 5)) {
// Replace "genid" nodes with "docid" to prevent clashing
memcpy(str->buf, "docid", 5);
}
return ref;
}
static Ref
blank_id(SerdReader* reader)
{
const char* prefix = reader->blank_prefix
? (const char*)reader->blank_prefix
: "genid";
char str[32]; // FIXME: ensure length of reader->blank_prefix is OK
const int len = snprintf(str, sizeof(str), "%s%u",
prefix, reader->next_id++);
return push_string(reader, str, len);
Ref str;
if (reader->bprefix) {
str = push_string(reader,
(const char*)reader->bprefix,
reader->bprefix_len);
} else {
str = push_string(reader, "", 0);
}
char num[32];
snprintf(num, sizeof(num), "%u", reader->next_id++);
append_string(reader, str, (const uint8_t*)"genid");
append_string(reader, str, (const uint8_t*)num);
return str;
}
// Spec: [21] blank ::= nodeID | '[]'
@ -1368,29 +1401,33 @@ SERD_API @@ -1368,29 +1401,33 @@ SERD_API
SerdReader*
serd_reader_new(SerdSyntax syntax,
void* handle,
void (*free_handle)(void*),
SerdBaseSink base_sink,
SerdPrefixSink prefix_sink,
SerdStatementSink statement_sink,
SerdEndSink end_sink)
{
const Cursor cur = { NULL, 0, 0 };
SerdReader* me = malloc(sizeof(struct SerdReaderImpl));
me->handle = handle;
me->base_sink = base_sink;
me->prefix_sink = prefix_sink;
me->statement_sink = statement_sink;
me->end_sink = end_sink;
me->fd = 0;
me->stack = serd_stack_new(STACK_PAGE_SIZE);
me->cur = cur;
me->blank_prefix = NULL;
me->next_id = 1;
me->read_buf = 0;
me->read_head = 0;
me->eof = false;
SerdReader* me = malloc(sizeof(struct SerdReaderImpl));
me->handle = handle;
me->free_handle = free_handle;
me->base_sink = base_sink;
me->prefix_sink = prefix_sink;
me->statement_sink = statement_sink;
me->end_sink = end_sink;
me->fd = 0;
me->stack = serd_stack_new(STACK_PAGE_SIZE);
me->syntax = syntax;
me->cur = cur;
me->bprefix = NULL;
me->bprefix_len = 0;
me->next_id = 1;
me->read_buf = 0;
me->read_head = 0;
me->eof = false;
#ifdef SERD_STACK_CHECK
me->alloc_stack = 0;
me->n_allocs = 0;
me->alloc_stack = 0;
me->n_allocs = 0;
#endif
#define RDF_FIRST NS_RDF "first"
@ -1415,20 +1452,84 @@ serd_reader_free(SerdReader* reader) @@ -1415,20 +1452,84 @@ serd_reader_free(SerdReader* reader)
free(reader->alloc_stack);
#endif
free(reader->stack.buf);
free(reader->bprefix);
if (reader->free_handle) {
reader->free_handle(reader->handle);
}
free(reader);
}
SERD_API
void*
serd_reader_get_handle(const SerdReader* reader)
{
return reader->handle;
}
SERD_API
void
serd_reader_set_blank_prefix(SerdReader* reader,
serd_reader_add_blank_prefix(SerdReader* reader,
const uint8_t* prefix)
{
reader->blank_prefix = prefix;
if (reader->bprefix) {
free(reader->bprefix);
reader->bprefix_len = 0;
reader->bprefix = NULL;
}
if (prefix) {
reader->bprefix_len = strlen((const char*)prefix);
reader->bprefix = malloc(reader->bprefix_len + 1);
memcpy(reader->bprefix, prefix, reader->bprefix_len + 1);
}
}
static const uint8_t*
file_uri_to_path(const uint8_t* uri)
{
const uint8_t* filename = NULL;
if (serd_uri_string_has_scheme(uri)) {
// Absolute URI, ensure it a file and chop scheme
if (strncmp((const char*)uri, "file:", 5)) {
fprintf(stderr, "Unsupported URI scheme `%s'\n", uri);
return NULL;
#ifdef __WIN32__
} else if (!strncmp((const char*)uri, "file:///", 8)) {
filename = uri + 8;
#else
} else if (!strncmp((const char*)uri, "file://", 7)) {
filename = uri + 7;
#endif
} else {
filename = uri + 5;
}
} else {
filename = uri;
}
return filename;
}
SERD_API
SerdStatus
serd_reader_read_file(SerdReader* reader,
const uint8_t* uri)
{
const uint8_t* path = file_uri_to_path(uri);
if (!path) {
return SERD_ERR_BAD_ARG;
}
FILE* fd = fopen((const char*)path, "r");
if (!fd) {
return SERD_ERR_UNKNOWN;
}
SerdStatus ret = serd_reader_read_file_handle(reader, fd, path);
fclose(fd);
return ret;
}
SERD_API
SerdStatus
serd_reader_read_file(SerdReader* me, FILE* file, const uint8_t* name)
serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
{
const Cursor cur = { name, 1, 1 };
me->fd = file;

94
src/serdi.c

@ -45,8 +45,11 @@ print_usage(const char* name, bool error) @@ -45,8 +45,11 @@ print_usage(const char* name, bool error)
fprintf(os, "Read and write RDF syntax.\n");
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
fprintf(os, " -h Display this help and exit\n");
fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples')\n");
fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples')\n");
fprintf(os, " -s INPUT Parse INPUT as string (terminates options)\n");
fprintf(os, " -p PREFIX Add PREFIX to blank node IDs\n");
fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs\n");
fprintf(os, " -v Display version information and exit\n");
return error ? 1 : 0;
}
@ -58,6 +61,20 @@ file_sink(const void* buf, size_t len, void* stream) @@ -58,6 +61,20 @@ file_sink(const void* buf, size_t len, void* stream)
return fwrite(buf, 1, len, file);
}
bool
set_syntax(SerdSyntax* syntax, const char* name)
{
if (!strcmp(name, "turtle")) {
*syntax = SERD_TURTLE;
} else if (!strcmp(name, "ntriples")) {
*syntax = SERD_NTRIPLES;
} else {
fprintf(stderr, "Unknown input format `%s'\n", name);
return false;
}
return true;
}
int
main(int argc, char** argv)
{
@ -65,14 +82,17 @@ main(int argc, char** argv) @@ -65,14 +82,17 @@ main(int argc, char** argv)
return print_usage(argv[0], true);
}
FILE* in_fd = NULL;
SerdSyntax output_syntax = SERD_NTRIPLES;
bool from_file = true;
const char* in_name = NULL;
FILE* in_fd = NULL;
SerdSyntax input_syntax = SERD_TURTLE;
SerdSyntax output_syntax = SERD_NTRIPLES;
bool from_file = true;
const uint8_t* in_name = NULL;
const uint8_t* add_prefix = NULL;
const uint8_t* chop_prefix = NULL;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
in_name = "(stdin)";
in_name = (const uint8_t*)"(stdin)";
in_fd = stdin;
break;
} else if (argv[a][1] == 'h') {
@ -80,23 +100,38 @@ main(int argc, char** argv) @@ -80,23 +100,38 @@ main(int argc, char** argv)
} else if (argv[a][1] == 'v') {
return print_version();
} else if (argv[a][1] == 's') {
in_name = "(string)";
in_name = (const uint8_t*)"(string)";
from_file = false;
++a;
break;
} else if (argv[a][1] == 'i') {
if (++a == argc) {
fprintf(stderr, "Missing value for -i\n");
return 1;
}
if (!set_syntax(&input_syntax, argv[a])) {
return 1;
}
} else if (argv[a][1] == 'o') {
if (++a == argc) {
fprintf(stderr, "Missing value for -o\n");
return 1;
}
if (!strcmp(argv[a], "turtle")) {
output_syntax = SERD_TURTLE;
} else if (!strcmp(argv[a], "ntriples")) {
output_syntax = SERD_NTRIPLES;
} else {
fprintf(stderr, "Unknown output format `%s'\n", argv[a]);
if (!set_syntax(&output_syntax, argv[a])) {
return 1;
}
} else if (argv[a][1] == 'p') {
if (++a == argc) {
fprintf(stderr, "Missing value for -p\n");
return 1;
}
add_prefix = (const uint8_t*)argv[a];
} else if (argv[a][1] == 'c') {
if (++a == argc) {
fprintf(stderr, "Missing value for -c\n");
return 1;
}
chop_prefix = (const uint8_t*)argv[a];
} else {
fprintf(stderr, "Unknown option `%s'\n", argv[a]);
return print_usage(argv[0], true);
@ -105,7 +140,7 @@ main(int argc, char** argv) @@ -105,7 +140,7 @@ main(int argc, char** argv)
const uint8_t* input = (const uint8_t*)argv[a++];
if (from_file) {
in_name = in_name ? in_name : (const char*)input;
in_name = in_name ? in_name : input;
if (!in_fd) {
if (serd_uri_string_has_scheme(input)) {
// INPUT is an absolute URI, ensure it a file and chop scheme
@ -132,27 +167,25 @@ main(int argc, char** argv) @@ -132,27 +167,25 @@ main(int argc, char** argv)
}
const uint8_t* base_uri_str = NULL;
SerdURI base_uri;
if (a < argc) { // Base URI given on command line
const uint8_t* const in_base_uri = (const uint8_t*)argv[a];
if (serd_uri_parse((const uint8_t*)in_base_uri, &base_uri)) {
fprintf(stderr, "Invalid base URI <%s>\n", argv[2]);
return 1;
}
base_uri_str = in_base_uri;
base_uri_str = (const uint8_t*)argv[a];
} else if (from_file) { // Use input file URI
base_uri_str = input;
} else {
base_uri_str = (const uint8_t*)"";
}
if (serd_uri_parse(base_uri_str, &base_uri)) {
SerdURI base_uri = SERD_URI_NULL;
SerdNode base_uri_node = serd_node_new_uri_from_string(
base_uri_str, &base_uri, &base_uri);
if (!base_uri_node.buf) {
fprintf(stderr, "Invalid base URI <%s>\n", base_uri_str);
return 1;
}
FILE* out_fd = stdout;
SerdEnv* env = serd_env_new();
SerdEnv* env = serd_env_new(&base_uri_node);
SerdStyle output_style = SERD_STYLE_RESOLVED;
if (output_syntax == SERD_NTRIPLES) {
@ -161,24 +194,28 @@ main(int argc, char** argv) @@ -161,24 +194,28 @@ main(int argc, char** argv)
output_style |= SERD_STYLE_ABBREVIATED;
}
SerdNode base_uri_node = serd_node_from_string(SERD_URI, base_uri_str);
serd_env_set_base_uri(env, &base_uri_node);
serd_env_get_base_uri(env, &base_uri);
SerdWriter* writer = serd_writer_new(
output_syntax, output_style, env, &base_uri, file_sink, out_fd);
if (chop_prefix) {
serd_writer_chop_blank_prefix(writer, chop_prefix);
}
State state = { env, writer };
SerdReader* reader = serd_reader_new(
SERD_TURTLE, state.writer,
input_syntax, state.writer, NULL,
(SerdBaseSink)serd_writer_set_base_uri,
(SerdPrefixSink)serd_writer_set_prefix,
(SerdStatementSink)serd_writer_write_statement,
(SerdEndSink)serd_writer_end_anon);
if (add_prefix) {
serd_reader_add_blank_prefix(reader, add_prefix);
}
const SerdStatus status = (from_file)
? serd_reader_read_file(reader, in_fd, (const uint8_t*)in_name)
? serd_reader_read_file_handle(reader, in_fd, in_name)
: serd_reader_read_string(reader, input);
serd_reader_free(reader);
@ -190,6 +227,7 @@ main(int argc, char** argv) @@ -190,6 +227,7 @@ main(int argc, char** argv)
serd_writer_finish(state.writer);
serd_writer_free(state.writer);
serd_env_free(state.env);
serd_node_free(&base_uri_node);
return (status == SERD_SUCCESS) ? 0 : 1;
}

52
src/writer.c

@ -44,6 +44,8 @@ struct SerdWriterImpl { @@ -44,6 +44,8 @@ struct SerdWriterImpl {
SerdSink sink;
void* stream;
WriteContext context;
uint8_t* bprefix;
size_t bprefix_len;
unsigned indent;
bool empty;
};
@ -214,7 +216,15 @@ write_node(SerdWriter* writer, @@ -214,7 +216,15 @@ write_node(SerdWriter* writer,
} // else fall through
case SERD_BLANK_ID:
writer->sink("_:", 2, writer->stream);
writer->sink(node->buf, node->n_bytes, writer->stream);
if (writer->bprefix
&& !strncmp((const char*)node->buf, (const char*)writer->bprefix,
writer->bprefix_len)) {
writer->sink(node->buf + writer->bprefix_len,
node->n_bytes - writer->bprefix_len,
writer->stream);
} else {
writer->sink(node->buf, node->n_bytes, writer->stream);
}
break;
case SERD_CURIE:
switch (writer->syntax) {
@ -445,19 +455,38 @@ serd_writer_new(SerdSyntax syntax, @@ -445,19 +455,38 @@ serd_writer_new(SerdSyntax syntax,
{
const WriteContext context = WRITE_CONTEXT_NULL;
SerdWriter* writer = malloc(sizeof(struct SerdWriterImpl));
writer->syntax = syntax;
writer->style = style;
writer->env = env;
writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
writer->anon_stack = serd_stack_new(sizeof(WriteContext));
writer->sink = sink;
writer->stream = stream;
writer->context = context;
writer->indent = 0;
writer->empty = true;
writer->syntax = syntax;
writer->style = style;
writer->env = env;
writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
writer->anon_stack = serd_stack_new(sizeof(WriteContext));
writer->sink = sink;
writer->stream = stream;
writer->context = context;
writer->bprefix = NULL;
writer->bprefix_len = 0;
writer->indent = 0;
writer->empty = true;
return writer;
}
SERD_API
void
serd_writer_chop_blank_prefix(SerdWriter* writer,
const uint8_t* prefix)
{
if (writer->bprefix) {
free(writer->bprefix);
writer->bprefix_len = 0;
writer->bprefix = NULL;
}
if (prefix) {
writer->bprefix_len = strlen((const char*)prefix);
writer->bprefix = malloc(writer->bprefix_len + 1);
memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
}
}
SERD_API
SerdStatus
serd_writer_set_base_uri(SerdWriter* writer,
@ -512,5 +541,6 @@ serd_writer_free(SerdWriter* writer) @@ -512,5 +541,6 @@ serd_writer_free(SerdWriter* writer)
SerdWriter* const me = (SerdWriter*)writer;
serd_writer_finish(me);
serd_stack_free(&writer->anon_stack);
free(writer->bprefix);
free(me);
}

2
tests/test-id.out

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
<http://example.org/base#c> <http://example.org/base#d> _:genid1 .
_:docid1 <http://example.org/base#a> <http://example.org/base#b> .

4
tests/test-id.ttl

@ -0,0 +1,4 @@ @@ -0,0 +1,4 @@
@prefix : <http://example.org/base#> .
:c :d [] .
_:genid1 :a :b .

17
wscript

@ -9,7 +9,7 @@ from waflib.extras import autowaf as autowaf @@ -9,7 +9,7 @@ from waflib.extras import autowaf as autowaf
import waflib.Logs as Logs, waflib.Options as Options
# Version of this package (even if built as a child)
SERD_VERSION = '0.2.0'
SERD_VERSION = '0.3.0'
SERD_MAJOR_VERSION = '0'
# Library version (UNIX style major, minor, micro)
@ -245,18 +245,21 @@ def test(ctx): @@ -245,18 +245,21 @@ def test(ctx):
autowaf.run_tests(ctx, APPNAME, commands, 1, name='bad')
thru_tests = good_tests
thru_tests.remove('tests/test-id.ttl') # IDs are mapped so files won't be identical
commands = []
for test in good_tests:
for test in thru_tests:
base_uri = 'http://www.w3.org/2001/sw/DataAccess/df1/' + test
out_filename = test + '.thru'
commands += [
'%s -o turtle %s/%s \'%s\' | %s - \'%s\' > %s.thru' % (
'./serdi_static', srcdir, test, base_uri,
'./serdi_static', base_uri, test) ]
'%s -o turtle %s/%s \'%s\' | %s -i turtle - \'%s\' | sed \'s/_:docid/_:genid/g\' > %s.thru' % (
'./serdi_static', srcdir, test, base_uri,
'./serdi_static', base_uri, test) ]
autowaf.run_tests(ctx, APPNAME, commands, 0, name='turtle-round-trip')
Logs.pprint('BOLD', '\nVerifying ntriples => turtle => ntriples')
for test in good_tests:
for test in thru_tests:
out_filename = test + '.thru'
if not os.access(out_filename, os.F_OK):
Logs.pprint('RED', 'FAIL: %s output is missing' % test)

Loading…
Cancel
Save