Serd subproject with meson
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

375 lines
11 KiB

/*
Copyright 2011-2020 David Robillard <http://drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#define _POSIX_C_SOURCE 200809L /* for fileno and posix_fadvise */
#include "serd_config.h"
#include "string_utils.h"
#include "serd/serd.h"
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN 1
# include <fcntl.h>
# include <io.h>
#endif
#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO)
# include <fcntl.h>
#endif
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg)
#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__)
typedef struct {
SerdSyntax syntax;
const char* name;
const char* extension;
} Syntax;
static const Syntax syntaxes[] = {{SERD_TURTLE, "turtle", ".ttl"},
{SERD_NTRIPLES, "ntriples", ".nt"},
{SERD_NQUADS, "nquads", ".nq"},
{SERD_TRIG, "trig", ".trig"},
{(SerdSyntax)0, NULL, NULL}};
static SerdSyntax
get_syntax(const char* name)
{
for (const Syntax* s = syntaxes; s->name; ++s) {
if (!serd_strncasecmp(s->name, name, strlen(name))) {
return s->syntax;
}
}
SERDI_ERRORF("unknown syntax `%s'\n", name);
return (SerdSyntax)0;
}
static SERD_PURE_FUNC SerdSyntax
guess_syntax(const char* filename)
{
const char* ext = strrchr(filename, '.');
if (ext) {
for (const Syntax* s = syntaxes; s->name; ++s) {
if (!serd_strncasecmp(s->extension, ext, strlen(ext))) {
return s->syntax;
}
}
}
return (SerdSyntax)0;
}
static int
print_version(void)
{
printf("serdi " SERD_VERSION " <http://drobilla.net/software/serd>\n");
printf("Copyright 2011-2020 David Robillard <http://drobilla.net>.\n"
"License: <http://www.opensource.org/licenses/isc>\n"
"This is free software; you are free to change and redistribute it."
"\nThere is NO WARRANTY, to the extent permitted by law.\n");
return 0;
}
static int
print_usage(const char* name, bool error)
{
FILE* const os = error ? stderr : stdout;
fprintf(os, "%s", error ? "\n" : "");
fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name);
fprintf(os, "Read and write RDF syntax.\n");
fprintf(os, "Use - for INPUT to read from standard input.\n\n");
fprintf(os, " -a Write ASCII output if possible.\n");
fprintf(os, " -b Fast bulk output for large serialisations.\n");
fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n");
fprintf(os, " -e Eat input one character at a time.\n");
fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
fprintf(os, " -h Display this help and exit.\n");
fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n");
fprintf(os, " -l Lax (non-strict) parsing.\n");
fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n");
fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n");
fprintf(os, " -q Suppress all output except data.\n");
fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n");
fprintf(os, " -s INPUT Parse INPUT as string (terminates options).\n");
fprintf(os, " -v Display version information and exit.\n");
return error ? 1 : 0;
}
static int
missing_arg(const char* name, char opt)
{
SERDI_ERRORF("option requires an argument -- '%c'\n", opt);
return print_usage(name, true);
}
static SerdStatus
quiet_error_sink(void* handle, const SerdError* e)
{
(void)handle;
(void)e;
return SERD_SUCCESS;
}
static inline FILE*
serd_fopen(const char* path, const char* mode)
{
FILE* fd = fopen(path, mode);
if (!fd) {
SERDI_ERRORF("failed to open file %s (%s)\n", path, strerror(errno));
return NULL;
}
#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO)
posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE);
#endif
return fd;
}
static SerdStyle
choose_style(const SerdSyntax input_syntax,
const SerdSyntax output_syntax,
const bool ascii,
const bool bulk_write,
const bool full_uris)
{
unsigned output_style = 0u;
if (output_syntax == SERD_NTRIPLES || ascii) {
output_style |= SERD_STYLE_ASCII;
} else if (output_syntax == SERD_TURTLE) {
output_style |= SERD_STYLE_ABBREVIATED;
if (!full_uris) {
output_style |= SERD_STYLE_CURIED;
}
}
if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) ||
(output_style & SERD_STYLE_CURIED)) {
// Base URI may change and/or we're abbreviating URIs, so must resolve
output_style |= SERD_STYLE_RESOLVED;
}
if (bulk_write) {
output_style |= SERD_STYLE_BULK;
}
return (SerdStyle)output_style;
}
int
main(int argc, char** argv)
{
if (argc < 2) {
return print_usage(argv[0], true);
}
FILE* in_fd = NULL;
SerdSyntax input_syntax = (SerdSyntax)0;
SerdSyntax output_syntax = (SerdSyntax)0;
bool from_file = true;
bool ascii = false;
bool bulk_read = true;
bool bulk_write = false;
bool full_uris = false;
bool lax = false;
bool quiet = false;
const uint8_t* in_name = NULL;
const uint8_t* add_prefix = NULL;
const uint8_t* chop_prefix = NULL;
const uint8_t* root_uri = NULL;
int a = 1;
for (; a < argc && argv[a][0] == '-'; ++a) {
if (argv[a][1] == '\0') {
in_name = (const uint8_t*)"(stdin)";
in_fd = stdin;
break;
}
if (argv[a][1] == 'a') {
ascii = true;
} else if (argv[a][1] == 'b') {
bulk_write = true;
} else if (argv[a][1] == 'e') {
bulk_read = false;
} else if (argv[a][1] == 'f') {
full_uris = true;
} else if (argv[a][1] == 'h') {
return print_usage(argv[0], false);
} else if (argv[a][1] == 'l') {
lax = true;
} else if (argv[a][1] == 'q') {
quiet = true;
} else if (argv[a][1] == 'v') {
return print_version();
} else if (argv[a][1] == 's') {
in_name = (const uint8_t*)"(string)";
from_file = false;
++a;
break;
} else if (argv[a][1] == 'i') {
if (++a == argc) {
return missing_arg(argv[0], 'i');
}
if (!(input_syntax = get_syntax(argv[a]))) {
return print_usage(argv[0], true);
}
} else if (argv[a][1] == 'o') {
if (++a == argc) {
return missing_arg(argv[0], 'o');
}
if (!(output_syntax = get_syntax(argv[a]))) {
return print_usage(argv[0], true);
}
} else if (argv[a][1] == 'p') {
if (++a == argc) {
return missing_arg(argv[0], 'p');
}
add_prefix = (const uint8_t*)argv[a];
} else if (argv[a][1] == 'c') {
if (++a == argc) {
return missing_arg(argv[0], 'c');
}
chop_prefix = (const uint8_t*)argv[a];
} else if (argv[a][1] == 'r') {
if (++a == argc) {
return missing_arg(argv[0], 'r');
}
root_uri = (const uint8_t*)argv[a];
} else {
SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1);
return print_usage(argv[0], true);
}
}
if (a == argc) {
SERDI_ERROR("missing input\n");
return 1;
}
#ifdef _WIN32
_setmode(_fileno(stdin), _O_BINARY);
_setmode(_fileno(stdout), _O_BINARY);
#endif
uint8_t* input_path = NULL;
const uint8_t* input = (const uint8_t*)argv[a++];
if (from_file) {
in_name = in_name ? in_name : input;
if (!in_fd) {
if (!strncmp((const char*)input, "file:", 5)) {
input_path = serd_file_uri_parse(input, NULL);
input = input_path;
}
if (!input || !(in_fd = serd_fopen((const char*)input, "rb"))) {
return 1;
}
}
}
if (!input_syntax && !(input_syntax = guess_syntax((const char*)in_name))) {
input_syntax = SERD_TRIG;
}
if (!output_syntax) {
output_syntax =
((input_syntax == SERD_TURTLE || input_syntax == SERD_NTRIPLES)
? SERD_NTRIPLES
: SERD_NQUADS);
}
const SerdStyle output_style =
choose_style(input_syntax, output_syntax, ascii, bulk_write, full_uris);
SerdURI base_uri = SERD_URI_NULL;
SerdNode base = SERD_NODE_NULL;
if (a < argc) { // Base URI given on command line
base =
serd_node_new_uri_from_string((const uint8_t*)argv[a], NULL, &base_uri);
} else if (from_file && in_fd != stdin) { // Use input file URI
base = serd_node_new_file_uri(input, NULL, &base_uri, true);
}
FILE* const out_fd = stdout;
SerdEnv* const env = serd_env_new(&base);
SerdWriter* const writer = serd_writer_new(
output_syntax, output_style, env, &base_uri, serd_file_sink, out_fd);
SerdReader* const reader =
serd_reader_new(input_syntax,
writer,
NULL,
(SerdBaseSink)serd_writer_set_base_uri,
(SerdPrefixSink)serd_writer_set_prefix,
(SerdStatementSink)serd_writer_write_statement,
(SerdEndSink)serd_writer_end_anon);
serd_reader_set_strict(reader, !lax);
if (quiet) {
serd_reader_set_error_sink(reader, quiet_error_sink, NULL);
serd_writer_set_error_sink(writer, quiet_error_sink, NULL);
}
SerdNode root = serd_node_from_string(SERD_URI, root_uri);
serd_writer_set_root_uri(writer, &root);
serd_writer_chop_blank_prefix(writer, chop_prefix);
serd_reader_add_blank_prefix(reader, add_prefix);
SerdStatus st = SERD_SUCCESS;
if (!from_file) {
st = serd_reader_read_string(reader, input);
} else if (bulk_read) {
st = serd_reader_read_file_handle(reader, in_fd, in_name);
} else {
st = serd_reader_start_stream(reader, in_fd, in_name, false);
while (!st) {
st = serd_reader_read_chunk(reader);
}
serd_reader_end_stream(reader);
}
serd_reader_free(reader);
serd_writer_finish(writer);
serd_writer_free(writer);
serd_env_free(env);
serd_node_free(&base);
free(input_path);
if (from_file) {
fclose(in_fd);
}
if (fclose(out_fd)) {
perror("serdi: write error");
st = SERD_ERR_UNKNOWN;
}
return (st > SERD_FAILURE) ? 1 : 0;
}