|
|
|
@ -17,16 +17,8 @@
@@ -17,16 +17,8 @@
|
|
|
|
|
#ifndef SERD_INTERNAL_H |
|
|
|
|
#define SERD_INTERNAL_H |
|
|
|
|
|
|
|
|
|
#define _POSIX_C_SOURCE 200809L /* for posix_memalign and posix_fadvise */ |
|
|
|
|
|
|
|
|
|
#include "serd_config.h" |
|
|
|
|
|
|
|
|
|
#include "serd/serd.h" |
|
|
|
|
|
|
|
|
|
#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) |
|
|
|
|
# include <fcntl.h> |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#include <assert.h> |
|
|
|
|
#include <ctype.h> |
|
|
|
|
#include <errno.h> |
|
|
|
@ -43,499 +35,6 @@
@@ -43,499 +35,6 @@
|
|
|
|
|
# define MIN(a, b) (((a) < (b)) ? (a) : (b)) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if defined(__GNUC__) |
|
|
|
|
# define SERD_LOG_FUNC(fmt, arg1) __attribute__((format(printf, fmt, arg1))) |
|
|
|
|
#else |
|
|
|
|
# define SERD_LOG_FUNC(fmt, arg1) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
static const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; |
|
|
|
|
|
|
|
|
|
/* File and Buffer Utilities */ |
|
|
|
|
|
|
|
|
|
static inline FILE* |
|
|
|
|
serd_fopen(const char* path, const char* mode) |
|
|
|
|
{ |
|
|
|
|
FILE* fd = fopen(path, mode); |
|
|
|
|
if (!fd) { |
|
|
|
|
fprintf(stderr, "error: failed to open file %s (%s)\n", |
|
|
|
|
path, strerror(errno)); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) |
|
|
|
|
posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); |
|
|
|
|
#endif |
|
|
|
|
return fd; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void* |
|
|
|
|
serd_bufalloc(size_t size) |
|
|
|
|
{ |
|
|
|
|
#ifdef HAVE_POSIX_MEMALIGN |
|
|
|
|
void* ptr = NULL; |
|
|
|
|
const int ret = posix_memalign(&ptr, SERD_PAGE_SIZE, size); |
|
|
|
|
return ret ? NULL : ptr; |
|
|
|
|
#else |
|
|
|
|
return malloc(size); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Byte source */ |
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
|
const uint8_t* filename; |
|
|
|
|
unsigned line; |
|
|
|
|
unsigned col; |
|
|
|
|
} Cursor; |
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
|
SerdSource read_func; ///< Read function (e.g. fread)
|
|
|
|
|
SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror)
|
|
|
|
|
void* stream; ///< Stream (e.g. FILE)
|
|
|
|
|
size_t page_size; ///< Number of bytes to read at a time
|
|
|
|
|
size_t buf_size; ///< Number of bytes in file_buf
|
|
|
|
|
Cursor cur; ///< Cursor for error reporting
|
|
|
|
|
uint8_t* file_buf; ///< Buffer iff reading pages from a file
|
|
|
|
|
const uint8_t* read_buf; ///< Pointer to file_buf or read_byte
|
|
|
|
|
size_t read_head; ///< Offset into read_buf
|
|
|
|
|
uint8_t read_byte; ///< 1-byte 'buffer' used when not paging
|
|
|
|
|
bool from_stream; ///< True iff reading from `stream`
|
|
|
|
|
bool prepared; ///< True iff prepared for reading
|
|
|
|
|
bool eof; ///< True iff end of file reached
|
|
|
|
|
} SerdByteSource; |
|
|
|
|
|
|
|
|
|
SerdStatus |
|
|
|
|
serd_byte_source_open_file(SerdByteSource* source, |
|
|
|
|
FILE* file, |
|
|
|
|
bool bulk); |
|
|
|
|
|
|
|
|
|
SerdStatus |
|
|
|
|
serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8); |
|
|
|
|
|
|
|
|
|
SerdStatus |
|
|
|
|
serd_byte_source_open_source(SerdByteSource* source, |
|
|
|
|
SerdSource read_func, |
|
|
|
|
SerdStreamErrorFunc error_func, |
|
|
|
|
void* stream, |
|
|
|
|
const uint8_t* name, |
|
|
|
|
size_t page_size); |
|
|
|
|
|
|
|
|
|
SerdStatus |
|
|
|
|
serd_byte_source_close(SerdByteSource* source); |
|
|
|
|
|
|
|
|
|
SerdStatus |
|
|
|
|
serd_byte_source_prepare(SerdByteSource* source); |
|
|
|
|
|
|
|
|
|
SerdStatus |
|
|
|
|
serd_byte_source_page(SerdByteSource* source); |
|
|
|
|
|
|
|
|
|
static inline uint8_t |
|
|
|
|
serd_byte_source_peek(SerdByteSource* source) |
|
|
|
|
{ |
|
|
|
|
assert(source->prepared); |
|
|
|
|
return source->read_buf[source->read_head]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline SerdStatus |
|
|
|
|
serd_byte_source_advance(SerdByteSource* source) |
|
|
|
|
{ |
|
|
|
|
SerdStatus st = SERD_SUCCESS; |
|
|
|
|
|
|
|
|
|
switch (serd_byte_source_peek(source)) { |
|
|
|
|
case '\n': ++source->cur.line; source->cur.col = 0; break; |
|
|
|
|
default: ++source->cur.col; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const bool was_eof = source->eof; |
|
|
|
|
if (source->from_stream) { |
|
|
|
|
source->eof = false; |
|
|
|
|
if (source->page_size > 1) { |
|
|
|
|
if (++source->read_head == source->page_size) { |
|
|
|
|
st = serd_byte_source_page(source); |
|
|
|
|
} else if (source->read_head == source->buf_size) { |
|
|
|
|
source->eof = true; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
if (!source->read_func(&source->read_byte, 1, 1, source->stream)) { |
|
|
|
|
source->eof = true; |
|
|
|
|
st = source->error_func(source->stream) ? SERD_ERR_UNKNOWN |
|
|
|
|
: SERD_FAILURE; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} else if (!source->eof) { |
|
|
|
|
++source->read_head; // Move to next character in string
|
|
|
|
|
if (source->read_buf[source->read_head] == '\0') { |
|
|
|
|
source->eof = true; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return (was_eof && source->eof) ? SERD_FAILURE : st; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Stack */ |
|
|
|
|
|
|
|
|
|
/** A dynamic stack in memory. */ |
|
|
|
|
typedef struct { |
|
|
|
|
uint8_t* buf; ///< Stack memory
|
|
|
|
|
size_t buf_size; ///< Allocated size of buf (>= size)
|
|
|
|
|
size_t size; ///< Conceptual size of stack in buf
|
|
|
|
|
} SerdStack; |
|
|
|
|
|
|
|
|
|
/** An offset to start the stack at. Note 0 is reserved for NULL. */ |
|
|
|
|
#define SERD_STACK_BOTTOM sizeof(void*) |
|
|
|
|
|
|
|
|
|
static inline SerdStack |
|
|
|
|
serd_stack_new(size_t size) |
|
|
|
|
{ |
|
|
|
|
SerdStack stack; |
|
|
|
|
stack.buf = (uint8_t*)calloc(size, 1); |
|
|
|
|
stack.buf_size = size; |
|
|
|
|
stack.size = SERD_STACK_BOTTOM; |
|
|
|
|
return stack; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline bool |
|
|
|
|
serd_stack_is_empty(SerdStack* stack) |
|
|
|
|
{ |
|
|
|
|
return stack->size <= SERD_STACK_BOTTOM; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void |
|
|
|
|
serd_stack_free(SerdStack* stack) |
|
|
|
|
{ |
|
|
|
|
free(stack->buf); |
|
|
|
|
stack->buf = NULL; |
|
|
|
|
stack->buf_size = 0; |
|
|
|
|
stack->size = 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline uint8_t* |
|
|
|
|
serd_stack_push(SerdStack* stack, size_t n_bytes) |
|
|
|
|
{ |
|
|
|
|
const size_t new_size = stack->size + n_bytes; |
|
|
|
|
if (stack->buf_size < new_size) { |
|
|
|
|
stack->buf_size += (stack->buf_size >> 1); // *= 1.5
|
|
|
|
|
stack->buf = (uint8_t*)realloc(stack->buf, stack->buf_size); |
|
|
|
|
} |
|
|
|
|
uint8_t* const ret = (stack->buf + stack->size); |
|
|
|
|
stack->size = new_size; |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void |
|
|
|
|
serd_stack_pop(SerdStack* stack, size_t n_bytes) |
|
|
|
|
{ |
|
|
|
|
assert(stack->size >= n_bytes); |
|
|
|
|
stack->size -= n_bytes; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void* |
|
|
|
|
serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) |
|
|
|
|
{ |
|
|
|
|
// Push one byte to ensure space for a pad count
|
|
|
|
|
serd_stack_push(stack, 1); |
|
|
|
|
|
|
|
|
|
// Push padding if necessary
|
|
|
|
|
const size_t pad = align - stack->size % align; |
|
|
|
|
if (pad > 0) { |
|
|
|
|
serd_stack_push(stack, pad); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Set top of stack to pad count so we can properly pop later
|
|
|
|
|
assert(pad < UINT8_MAX); |
|
|
|
|
stack->buf[stack->size - 1] = (uint8_t)pad; |
|
|
|
|
|
|
|
|
|
// Push requested space at aligned location
|
|
|
|
|
return serd_stack_push(stack, n_bytes); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void |
|
|
|
|
serd_stack_pop_aligned(SerdStack* stack, size_t n_bytes) |
|
|
|
|
{ |
|
|
|
|
// Pop requested space down to aligned location
|
|
|
|
|
serd_stack_pop(stack, n_bytes); |
|
|
|
|
|
|
|
|
|
// Get amount of padding from top of stack
|
|
|
|
|
const uint8_t pad = stack->buf[stack->size - 1]; |
|
|
|
|
|
|
|
|
|
// Pop padding and pad count
|
|
|
|
|
serd_stack_pop(stack, pad + 1u); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Byte Sink */ |
|
|
|
|
|
|
|
|
|
typedef struct SerdByteSinkImpl { |
|
|
|
|
SerdSink sink; |
|
|
|
|
void* stream; |
|
|
|
|
uint8_t* buf; |
|
|
|
|
size_t size; |
|
|
|
|
size_t block_size; |
|
|
|
|
} SerdByteSink; |
|
|
|
|
|
|
|
|
|
static inline SerdByteSink |
|
|
|
|
serd_byte_sink_new(SerdSink sink, void* stream, size_t block_size) |
|
|
|
|
{ |
|
|
|
|
SerdByteSink bsink; |
|
|
|
|
bsink.sink = sink; |
|
|
|
|
bsink.stream = stream; |
|
|
|
|
bsink.size = 0; |
|
|
|
|
bsink.block_size = block_size; |
|
|
|
|
bsink.buf = ((block_size > 1) |
|
|
|
|
? (uint8_t*)serd_bufalloc(block_size) |
|
|
|
|
: NULL); |
|
|
|
|
return bsink; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void |
|
|
|
|
serd_byte_sink_flush(SerdByteSink* bsink) |
|
|
|
|
{ |
|
|
|
|
if (bsink->block_size > 1 && bsink->size > 0) { |
|
|
|
|
bsink->sink(bsink->buf, bsink->size, bsink->stream); |
|
|
|
|
bsink->size = 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void |
|
|
|
|
serd_byte_sink_free(SerdByteSink* bsink) |
|
|
|
|
{ |
|
|
|
|
serd_byte_sink_flush(bsink); |
|
|
|
|
free(bsink->buf); |
|
|
|
|
bsink->buf = NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline size_t |
|
|
|
|
serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink) |
|
|
|
|
{ |
|
|
|
|
if (len == 0) { |
|
|
|
|
return 0; |
|
|
|
|
} else if (bsink->block_size == 1) { |
|
|
|
|
return bsink->sink(buf, len, bsink->stream); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const size_t orig_len = len; |
|
|
|
|
while (len) { |
|
|
|
|
const size_t space = bsink->block_size - bsink->size; |
|
|
|
|
const size_t n = MIN(space, len); |
|
|
|
|
|
|
|
|
|
// Write as much as possible into the remaining buffer space
|
|
|
|
|
memcpy(bsink->buf + bsink->size, buf, n); |
|
|
|
|
bsink->size += n; |
|
|
|
|
buf = (const uint8_t*)buf + n; |
|
|
|
|
len -= n; |
|
|
|
|
|
|
|
|
|
// Flush page if buffer is full
|
|
|
|
|
if (bsink->size == bsink->block_size) { |
|
|
|
|
bsink->sink(bsink->buf, bsink->block_size, bsink->stream); |
|
|
|
|
bsink->size = 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return orig_len; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Character utilities */ |
|
|
|
|
|
|
|
|
|
/** Return true if `c` lies within [`min`...`max`] (inclusive) */ |
|
|
|
|
static inline bool |
|
|
|
|
in_range(const int c, const int min, const int max) |
|
|
|
|
{ |
|
|
|
|
return (c >= min && c <= max); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** RFC2234: ALPHA ::= %x41-5A / %x61-7A ; A-Z / a-z */ |
|
|
|
|
static inline bool |
|
|
|
|
is_alpha(const int c) |
|
|
|
|
{ |
|
|
|
|
return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */ |
|
|
|
|
static inline bool |
|
|
|
|
is_digit(const int c) |
|
|
|
|
{ |
|
|
|
|
return in_range(c, '0', '9'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* RFC2234: HEXDIG ::= DIGIT / "A" / "B" / "C" / "D" / "E" / "F" */ |
|
|
|
|
static inline bool |
|
|
|
|
is_hexdig(const int c) |
|
|
|
|
{ |
|
|
|
|
return is_digit(c) || in_range(c, 'A', 'F'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Turtle / JSON / C: XDIGIT ::= DIGIT / A-F / a-f */ |
|
|
|
|
static inline bool |
|
|
|
|
is_xdigit(const int c) |
|
|
|
|
{ |
|
|
|
|
return is_hexdig(c) || in_range(c, 'a', 'f'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline bool |
|
|
|
|
is_space(const char c) |
|
|
|
|
{ |
|
|
|
|
switch (c) { |
|
|
|
|
case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': |
|
|
|
|
return true; |
|
|
|
|
default: |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline bool |
|
|
|
|
is_base64(const uint8_t c) |
|
|
|
|
{ |
|
|
|
|
return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '='; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline bool |
|
|
|
|
is_windows_path(const uint8_t* path) |
|
|
|
|
{ |
|
|
|
|
return is_alpha(path[0]) && (path[1] == ':' || path[1] == '|') |
|
|
|
|
&& (path[2] == '/' || path[2] == '\\'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* String utilities */ |
|
|
|
|
|
|
|
|
|
size_t |
|
|
|
|
serd_substrlen(const uint8_t* str, |
|
|
|
|
size_t len, |
|
|
|
|
size_t* n_bytes, |
|
|
|
|
SerdNodeFlags* flags); |
|
|
|
|
|
|
|
|
|
static inline int |
|
|
|
|
serd_strncasecmp(const char* s1, const char* s2, size_t n) |
|
|
|
|
{ |
|
|
|
|
for (; n > 0 && *s2; s1++, s2++, --n) { |
|
|
|
|
if (toupper(*s1) != toupper(*s2)) { |
|
|
|
|
return ((*(const uint8_t*)s1 < *(const uint8_t*)s2) ? -1 : +1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline uint32_t |
|
|
|
|
utf8_num_bytes(const uint8_t c) |
|
|
|
|
{ |
|
|
|
|
if ((c & 0x80) == 0) { // Starts with `0'
|
|
|
|
|
return 1; |
|
|
|
|
} else if ((c & 0xE0) == 0xC0) { // Starts with `110'
|
|
|
|
|
return 2; |
|
|
|
|
} else if ((c & 0xF0) == 0xE0) { // Starts with `1110'
|
|
|
|
|
return 3; |
|
|
|
|
} else if ((c & 0xF8) == 0xF0) { // Starts with `11110'
|
|
|
|
|
return 4; |
|
|
|
|
} |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// Return the code point of a UTF-8 character with known length
|
|
|
|
|
static inline uint32_t |
|
|
|
|
parse_counted_utf8_char(const uint8_t* utf8, size_t size) |
|
|
|
|
{ |
|
|
|
|
uint32_t c = utf8[0] & ((1u << (8 - size)) - 1); |
|
|
|
|
for (size_t i = 1; i < size; ++i) { |
|
|
|
|
const uint8_t in = utf8[i] & 0x3F; |
|
|
|
|
c = (c << 6) | in; |
|
|
|
|
} |
|
|
|
|
return c; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// Parse a UTF-8 character, set *size to the length, and return the code point
|
|
|
|
|
static inline uint32_t |
|
|
|
|
parse_utf8_char(const uint8_t* utf8, size_t* size) |
|
|
|
|
{ |
|
|
|
|
switch (*size = utf8_num_bytes(utf8[0])) { |
|
|
|
|
case 1: case 2: case 3: case 4: |
|
|
|
|
return parse_counted_utf8_char(utf8, *size); |
|
|
|
|
default: |
|
|
|
|
*size = 0; |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* URI utilities */ |
|
|
|
|
|
|
|
|
|
static inline bool |
|
|
|
|
chunk_equals(const SerdChunk* a, const SerdChunk* b) |
|
|
|
|
{ |
|
|
|
|
return a->len == b->len |
|
|
|
|
&& !strncmp((const char*)a->buf, (const char*)b->buf, a->len); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline size_t |
|
|
|
|
uri_path_len(const SerdURI* uri) |
|
|
|
|
{ |
|
|
|
|
return uri->path_base.len + uri->path.len; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline uint8_t |
|
|
|
|
uri_path_at(const SerdURI* uri, size_t i) |
|
|
|
|
{ |
|
|
|
|
if (i < uri->path_base.len) { |
|
|
|
|
return uri->path_base.buf[i]; |
|
|
|
|
} else { |
|
|
|
|
return uri->path.buf[i - uri->path_base.len]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
Return the index of the first differing character after the last root slash, |
|
|
|
|
or zero if `uri` is not under `root`. |
|
|
|
|
*/ |
|
|
|
|
static inline size_t |
|
|
|
|
uri_rooted_index(const SerdURI* uri, const SerdURI* root) |
|
|
|
|
{ |
|
|
|
|
if (!root || !root->scheme.len || |
|
|
|
|
!chunk_equals(&root->scheme, &uri->scheme) || |
|
|
|
|
!chunk_equals(&root->authority, &uri->authority)) { |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool differ = false; |
|
|
|
|
const size_t path_len = uri_path_len(uri); |
|
|
|
|
const size_t root_len = uri_path_len(root); |
|
|
|
|
size_t last_root_slash = 0; |
|
|
|
|
for (size_t i = 0; i < path_len && i < root_len; ++i) { |
|
|
|
|
const uint8_t u = uri_path_at(uri, i); |
|
|
|
|
const uint8_t r = uri_path_at(root, i); |
|
|
|
|
|
|
|
|
|
differ = differ || u != r; |
|
|
|
|
if (r == '/') { |
|
|
|
|
last_root_slash = i; |
|
|
|
|
if (differ) { |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return last_root_slash + 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** Return true iff `uri` shares path components with `root` */ |
|
|
|
|
static inline bool |
|
|
|
|
uri_is_related(const SerdURI* uri, const SerdURI* root) |
|
|
|
|
{ |
|
|
|
|
return uri_rooted_index(uri, root) > 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** Return true iff `uri` is within the base of `root` */ |
|
|
|
|
static inline bool |
|
|
|
|
uri_is_under(const SerdURI* uri, const SerdURI* root) |
|
|
|
|
{ |
|
|
|
|
const size_t index = uri_rooted_index(uri, root); |
|
|
|
|
return index > 0 && uri->path.len > index; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline bool |
|
|
|
|
is_uri_scheme_char(const int c) |
|
|
|
|
{ |
|
|
|
|
switch (c) { |
|
|
|
|
case ':': case '+': case '-': case '.': |
|
|
|
|
return true; |
|
|
|
|
default: |
|
|
|
|
return is_alpha(c) || is_digit(c); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Error reporting */ |
|
|
|
|
|
|
|
|
|
static inline void |
|
|
|
@ -549,94 +48,4 @@ serd_error(SerdErrorSink error_sink, void* handle, const SerdError* e)
@@ -549,94 +48,4 @@ serd_error(SerdErrorSink error_sink, void* handle, const SerdError* e)
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
SERD_LOG_FUNC(3, 4) |
|
|
|
|
int |
|
|
|
|
r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...); |
|
|
|
|
|
|
|
|
|
/* Reader */ |
|
|
|
|
|
|
|
|
|
#ifdef SERD_STACK_CHECK |
|
|
|
|
# define SERD_STACK_ASSERT_TOP(reader, ref) \ |
|
|
|
|
assert(ref == reader->allocs[reader->n_allocs - 1]); |
|
|
|
|
#else |
|
|
|
|
# define SERD_STACK_ASSERT_TOP(reader, ref) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
/* Reference to a node in the stack (we can not use pointers since the
|
|
|
|
|
stack may be reallocated, invalidating any pointers to elements). |
|
|
|
|
*/ |
|
|
|
|
typedef size_t Ref; |
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
|
Ref graph; |
|
|
|
|
Ref subject; |
|
|
|
|
Ref predicate; |
|
|
|
|
Ref object; |
|
|
|
|
Ref datatype; |
|
|
|
|
Ref lang; |
|
|
|
|
SerdStatementFlags* flags; |
|
|
|
|
} ReadContext; |
|
|
|
|
|
|
|
|
|
struct SerdReaderImpl { |
|
|
|
|
void* handle; |
|
|
|
|
void (*free_handle)(void* ptr); |
|
|
|
|
SerdBaseSink base_sink; |
|
|
|
|
SerdPrefixSink prefix_sink; |
|
|
|
|
SerdStatementSink statement_sink; |
|
|
|
|
SerdEndSink end_sink; |
|
|
|
|
SerdErrorSink error_sink; |
|
|
|
|
void* error_handle; |
|
|
|
|
Ref rdf_first; |
|
|
|
|
Ref rdf_rest; |
|
|
|
|
Ref rdf_nil; |
|
|
|
|
SerdNode default_graph; |
|
|
|
|
SerdByteSource source; |
|
|
|
|
SerdStack stack; |
|
|
|
|
SerdSyntax syntax; |
|
|
|
|
unsigned next_id; |
|
|
|
|
SerdStatus status; |
|
|
|
|
uint8_t* buf; |
|
|
|
|
uint8_t* bprefix; |
|
|
|
|
size_t bprefix_len; |
|
|
|
|
bool strict; ///< True iff strict parsing
|
|
|
|
|
bool seen_genid; |
|
|
|
|
#ifdef SERD_STACK_CHECK |
|
|
|
|
Ref* allocs; ///< Stack of push offsets
|
|
|
|
|
size_t n_allocs; ///< Number of stack pushes
|
|
|
|
|
#endif |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
Ref push_node_padded(SerdReader* reader, |
|
|
|
|
size_t maxlen, |
|
|
|
|
SerdType type, |
|
|
|
|
const char* str, |
|
|
|
|
size_t n_bytes); |
|
|
|
|
|
|
|
|
|
Ref push_node(SerdReader* reader, |
|
|
|
|
SerdType type, |
|
|
|
|
const char* str, |
|
|
|
|
size_t n_bytes); |
|
|
|
|
|
|
|
|
|
size_t genid_size(SerdReader* reader); |
|
|
|
|
Ref blank_id(SerdReader* reader); |
|
|
|
|
void set_blank_id(SerdReader* reader, Ref ref, size_t buf_size); |
|
|
|
|
|
|
|
|
|
SerdNode* deref(SerdReader* reader, Ref ref); |
|
|
|
|
|
|
|
|
|
Ref pop_node(SerdReader* reader, Ref ref); |
|
|
|
|
|
|
|
|
|
bool emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l); |
|
|
|
|
|
|
|
|
|
bool read_n3_statement(SerdReader* reader); |
|
|
|
|
bool read_nquadsDoc(SerdReader* reader); |
|
|
|
|
bool read_turtleTrigDoc(SerdReader* reader); |
|
|
|
|
|
|
|
|
|
typedef enum { |
|
|
|
|
FIELD_NONE, |
|
|
|
|
FIELD_SUBJECT, |
|
|
|
|
FIELD_PREDICATE, |
|
|
|
|
FIELD_OBJECT, |
|
|
|
|
FIELD_GRAPH |
|
|
|
|
} Field; |
|
|
|
|
|
|
|
|
|
#endif // SERD_INTERNAL_H
|
|
|
|
|