Browse Source

Add incremental read interface suitable for reading from infinite streams.

git-svn-id: http://svn.drobilla.net/serd/trunk@350 490d8e77-9747-427b-9fa3-0b8f29cee8a0
zrythm_meson
David Robillard 11 years ago
parent
commit
4de86fdfb6
  1. 7
      NEWS
  2. 10
      doc/serdi.1
  3. 34
      serd/serd.h
  4. 78
      src/reader.c
  5. 19
      src/serdi.c
  6. 4
      wscript

7
NEWS

@ -1,3 +1,10 @@ @@ -1,3 +1,10 @@
serd (9999) unstable;
* Add incremental read interface suitable for reading from infinite streams.
* Add -e option to serdi to use incremental reading.
-- David Robillard <d@drobilla.net>
serd (0.14.0) stable;
* Use path variables in pkgconfig files

10
doc/serdi.1

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
.TH SERDI 1 "17 Jan 2012"
.TH SERDI 1 "08 May 2012"
.SH NAME
.B serdi \- Read and write RDF syntax
@ -16,6 +16,14 @@ Fast bulk output for large serialisations. @@ -16,6 +16,14 @@ Fast bulk output for large serialisations.
\fB\-c PREFIX\fR
Chop PREFIX from matching blank node IDs.
.TP
\fB\-e\fR
Eat input one character at a time, rather than a page at a time which is the
default. This is useful when reading from a pipe since output will be
generated immediately as input arrives, rather than waiting until an entire
page of input has arrived. With this option serdi uses one page less memory,
but will likely be significantly slower.
.TP
\fB\-f\fR
Keep full URIs in input (don't qualify).

34
serd/serd.h

@ -704,6 +704,40 @@ SerdStatus @@ -704,6 +704,40 @@ SerdStatus
serd_reader_read_file(SerdReader* reader,
const uint8_t* uri);
/**
Start an incremental read from a file handle.
Iff @p bulk is true, @p file will be read a page at a time. This is more
efficient, but uses a page of memory and means that an entire page of input
must be ready before any callbacks will fire. To react as soon as input
arrives, set @p bulk to false.
*/
SERD_API
SerdStatus
serd_reader_start_stream(SerdReader* me,
FILE* file,
const uint8_t* name,
bool bulk);
/**
Read a single "chunk" of data during an incremental read.
This function will read a single top level description, and return. This
may be a directive, statement, or several statements; essentially it reads
until a '.' is encountered. This is particularly useful for reading
directly from a pipe or socket.
*/
SERD_API
SerdStatus
serd_reader_read_chunk(SerdReader* me);
/**
Finish an incremental read from a file handle.
*/
SERD_API
SerdStatus
serd_reader_end_stream(SerdReader* me);
/**
Read @c file.
*/

78
src/reader.c

@ -78,7 +78,9 @@ struct SerdReaderImpl { @@ -78,7 +78,9 @@ struct SerdReaderImpl {
unsigned next_id;
uint8_t* read_buf;
int32_t read_head; ///< Offset into read_buf
uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
bool from_file; ///< True iff reading from @ref fd
bool paging; ///< True iff reading a page at a time
bool eof;
bool seen_genid;
#ifdef SERD_STACK_CHECK
@ -124,14 +126,19 @@ static inline uint8_t @@ -124,14 +126,19 @@ static inline uint8_t
eat_byte_safe(SerdReader* reader, const uint8_t byte)
{
assert(peek_byte(reader) == byte);
++reader->read_head;
switch (byte) {
case '\0': reader->eof = true; break;
case '\n': ++reader->cur.line; reader->cur.col = 0; break;
default: ++reader->cur.col;
}
if (reader->from_file && (reader->read_head == SERD_PAGE_SIZE)) {
if (reader->from_file && !reader->paging) {
const int c = fgetc(reader->fd);
reader->read_byte = (c == EOF) ? 0 : (uint8_t)c;
if (c == EOF) {
reader->eof = true;
}
} else if (++reader->read_head == SERD_PAGE_SIZE && reader->paging) {
page(reader);
}
return byte;
@ -548,7 +555,7 @@ read_comment(SerdReader* reader) @@ -548,7 +555,7 @@ read_comment(SerdReader* reader)
{
eat_byte_safe(reader, '#');
uint8_t c;
while (((c = peek_byte(reader)) != 0xA) && (c != 0xD)) {
while (((c = peek_byte(reader)) != 0xA) && (c != 0xD) && c) {
eat_byte_safe(reader, c);
}
}
@ -1458,14 +1465,17 @@ static void @@ -1458,14 +1465,17 @@ static void
skip_bom(SerdReader* me)
{
const uint8_t* const b = me->read_buf;
if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) {
if (me->paging && b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) {
me->read_head += 3;
}
}
SERD_API
SerdStatus
serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
serd_reader_start_stream(SerdReader* me,
FILE* file,
const uint8_t* name,
bool bulk)
{
const Cursor cur = { name, 1, 1 };
me->fd = file;
@ -1473,19 +1483,62 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) @@ -1473,19 +1483,62 @@ serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
me->cur = cur;
me->from_file = true;
me->eof = false;
me->read_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE);
me->paging = bulk;
if (bulk) {
me->read_buf = (uint8_t*)serd_bufalloc(SERD_PAGE_SIZE);
memset(me->read_buf, '\0', SERD_PAGE_SIZE);
SerdStatus st = page(me);
if (st) {
serd_reader_end_stream(me);
return st;
}
skip_bom(me);
} else {
me->read_buf = &me->read_byte;
me->read_byte = 0; // Don't read to avoid potentially blocking
}
memset(me->read_buf, '\0', SERD_PAGE_SIZE);
return SERD_SUCCESS;
}
SerdStatus st = page(me);
if (!st) {
skip_bom(me);
st = read_turtleDoc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
SERD_API
SerdStatus
serd_reader_read_chunk(SerdReader* me)
{
if (!me->read_byte) {
// Read initial byte
const int c = fgetc(me->fd);
me->read_byte = (c == EOF) ? 0 : (uint8_t)c;
if (c == EOF) {
me->eof = true;
return SERD_FAILURE;
}
}
return read_statement(me) ? SERD_SUCCESS : SERD_FAILURE;
}
free(me->read_buf);
SERD_API
SerdStatus
serd_reader_end_stream(SerdReader* me)
{
if (me->paging) {
free(me->read_buf);
}
me->fd = 0;
me->read_buf = NULL;
return SERD_SUCCESS;
}
SERD_API
SerdStatus
serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name)
{
SerdStatus st = serd_reader_start_stream(me, file, name, true);
if (!st) {
st = read_turtleDoc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN;
serd_reader_end_stream(me);
}
return st;
}
@ -1499,6 +1552,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8) @@ -1499,6 +1552,7 @@ serd_reader_read_string(SerdReader* me, const uint8_t* utf8)
me->read_head = 0;
me->cur = cur;
me->from_file = false;
me->paging = false;
me->eof = false;
skip_bom(me);

19
src/serdi.c

@ -46,6 +46,7 @@ print_usage(const char* name, bool error) @@ -46,6 +46,7 @@ print_usage(const char* name, bool error)
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
fprintf(os, " -b Fast bulk output for large serialisations.\n");
fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n");
fprintf(os, " -e Eat input one character at a time.\n");
fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
fprintf(os, " -h Display this help and exit.\n");
fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples').\n");
@ -89,6 +90,7 @@ main(int argc, char** argv) @@ -89,6 +90,7 @@ main(int argc, char** argv)
SerdSyntax input_syntax = SERD_TURTLE;
SerdSyntax output_syntax = SERD_NTRIPLES;
bool from_file = true;
bool bulk_read = true;
bool bulk_write = false;
bool full_uris = false;
const uint8_t* in_name = NULL;
@ -103,6 +105,8 @@ main(int argc, char** argv) @@ -103,6 +105,8 @@ main(int argc, char** argv)
break;
} else if (argv[a][1] == 'b') {
bulk_write = true;
} else if (argv[a][1] == 'e') {
bulk_read = false;
} else if (argv[a][1] == 'f') {
full_uris = true;
} else if (argv[a][1] == 'h') {
@ -206,9 +210,18 @@ main(int argc, char** argv) @@ -206,9 +210,18 @@ main(int argc, char** argv)
serd_writer_chop_blank_prefix(writer, chop_prefix);
serd_reader_add_blank_prefix(reader, add_prefix);
const SerdStatus status = (from_file)
? serd_reader_read_file_handle(reader, in_fd, in_name)
: serd_reader_read_string(reader, input);
SerdStatus status = SERD_SUCCESS;
if (!from_file) {
status = serd_reader_read_string(reader, input);
} else if (bulk_read) {
status = serd_reader_read_file_handle(reader, in_fd, in_name);
} else {
status = serd_reader_start_stream(reader, in_fd, in_name, false);
while (!status) {
status = serd_reader_read_chunk(reader);
}
serd_reader_end_stream(reader);
}
serd_reader_free(reader);

4
wscript

@ -9,7 +9,7 @@ from waflib.extras import autowaf as autowaf @@ -9,7 +9,7 @@ from waflib.extras import autowaf as autowaf
import waflib.Logs as Logs, waflib.Options as Options
# Version of this package (even if built as a child)
SERD_VERSION = '0.14.0'
SERD_VERSION = '0.15.0'
SERD_MAJOR_VERSION = '0'
# Library version (UNIX style major, minor, micro)
@ -389,6 +389,8 @@ def test(ctx): @@ -389,6 +389,8 @@ def test(ctx):
flags += ' -f'
if (num % 3 == 0):
flags += ' -r http://www.w3.org/'
if (num % 7 == 0):
flags += ' -e'
base_uri = 'http://www.w3.org/2001/sw/DataAccess/df1/' + test.replace('\\', '/')
out_filename = test + '.thru'
commands += [

Loading…
Cancel
Save