Browse Source

Pass negative eval tests (reject invalid escaped URI characters).

git-svn-id: http://svn.drobilla.net/serd/trunk@430 490d8e77-9747-427b-9fa3-0b8f29cee8a0
zrythm_meson
David Robillard 10 years ago
parent
commit
c7fb2c971e
  1. 33
      src/reader.c
  2. 2
      tests/good/test-escapes.nt
  3. 2
      tests/good/test-escapes.ttl
  4. 2
      tests/good/test-uri-escape.nt
  5. 2
      tests/good/test-uri-escape.ttl
  6. 8
      wscript

33
src/reader.c

@ -280,7 +280,7 @@ read_HEX(SerdReader* reader) @@ -280,7 +280,7 @@ read_HEX(SerdReader* reader)
}
// Read UCHAR escape, initial \ is already eaten by caller
static inline bool
static inline uint32_t
read_UCHAR(SerdReader* reader, Ref dest)
{
const uint8_t b = peek_byte(reader);
@ -293,38 +293,39 @@ read_UCHAR(SerdReader* reader, Ref dest) @@ -293,38 +293,39 @@ read_UCHAR(SerdReader* reader, Ref dest)
length = 4;
break;
default:
return false;
return 0;
}
eat_byte_safe(reader, b);
uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (unsigned i = 0; i < length; ++i) {
if (!(buf[i] = read_HEX(reader))) {
return false;
return 0;
}
}
uint32_t c;
sscanf((const char*)buf, "%X", &c);
uint32_t code;
sscanf((const char*)buf, "%X", &code);
unsigned size = 0;
if (c < 0x00000080) {
if (code < 0x00000080) {
size = 1;
} else if (c < 0x00000800) {
} else if (code < 0x00000800) {
size = 2;
} else if (c < 0x00010000) {
} else if (code < 0x00010000) {
size = 3;
} else if (c < 0x00110000) {
} else if (code < 0x00110000) {
size = 4;
} else {
r_err(reader, SERD_ERR_BAD_SYNTAX,
"unicode character 0x%X out of range\n", c);
"unicode character 0x%X out of range\n", code);
push_replacement(reader, dest);
return true;
return 0xFFFD;
}
// Build output in buf
// (Note # of bytes = # of leading 1 bits in first byte)
uint32_t c = code;
switch (size) {
case 4:
buf[3] = 0x80 | (uint8_t)(c & 0x3F);
@ -345,7 +346,7 @@ read_UCHAR(SerdReader* reader, Ref dest) @@ -345,7 +346,7 @@ read_UCHAR(SerdReader* reader, Ref dest)
for (unsigned i = 0; i < size; ++i) {
push_byte(reader, dest, buf[i]);
}
return true;
return code;
}
// Read ECHAR escape, initial \ is already eaten by caller
@ -741,7 +742,8 @@ static Ref @@ -741,7 +742,8 @@ static Ref
read_IRIREF(SerdReader* reader)
{
TRY_RET(eat_byte_check(reader, '<'));
Ref ref = push_node(reader, SERD_URI, "", 0);
Ref ref = push_node(reader, SERD_URI, "", 0);
uint32_t code;
while (true) {
const uint8_t c = peek_byte(reader);
switch (c) {
@ -754,9 +756,10 @@ read_IRIREF(SerdReader* reader) @@ -754,9 +756,10 @@ read_IRIREF(SerdReader* reader)
return ref;
case '\\':
eat_byte_safe(reader, c);
if (!read_UCHAR(reader, ref)) {
switch (code = read_UCHAR(reader, ref)) {
case 0: case ' ': case '<': case '>':
r_err(reader, SERD_ERR_BAD_SYNTAX,
"invalid IRI character `%c'\n", c);
"invalid escaped IRI character %X %c\n", code, code);
return pop_node(reader, ref);
}
break;

2
tests/good/test-escapes.nt

@ -1,2 +1,2 @@ @@ -1,2 +1,2 @@
<http://example.org/thing> <http://example.org/comment> "\\\r\n\t" .
<http://example.org/thing> <http://example.org/comment> <http://example.org/\u003E> .
<http://example.org/thing> <http://example.org/comment> <http://example.org/\u005C> .

2
tests/good/test-escapes.ttl

@ -1,2 +1,2 @@ @@ -1,2 +1,2 @@
<http://example.org/thing> <http://example.org/comment> "\\\r\n\t" .
<http://example.org/thing> <http://example.org/comment> <http://example.org/\u003E> .
<http://example.org/thing> <http://example.org/comment> <http://example.org/\u005C> .

2
tests/good/test-uri-escape.nt

@ -1 +1 @@ @@ -1 +1 @@
<http://example.org/node> <http://example.org/prop> <scheme:\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u0020!\u0022#$%&'()*+,-./0123456789:/\u003C=\u003E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005C]\u005E_\u0060abcdefghijklmnopqrstuvwxyz\u007B\u007C\u007D~\u007F> .
<http://example.org/node> <http://example.org/prop> <scheme:\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F%20!\u0022#$%&'()*+,-./0123456789:/%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005C]\u005E_\u0060abcdefghijklmnopqrstuvwxyz\u007B\u007C\u007D~\u007F> .

2
tests/good/test-uri-escape.ttl

@ -1 +1 @@ @@ -1 +1 @@
<http://example.org/node> <http://example.org/prop> <scheme:\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u0020!\u0022#$%&'()*+,-./0123456789:/\u003C=\u003E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005C]\u005E_\u0060abcdefghijklmnopqrstuvwxyz\u007B\u007C\u007D~\u007F> .
<http://example.org/node> <http://example.org/prop> <scheme:\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F%20!\u0022#$%&'()*+,-./0123456789:/%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005C]\u005E_\u0060abcdefghijklmnopqrstuvwxyz\u007B\u007C\u007D~\u007F> .

8
wscript

@ -348,6 +348,14 @@ def test_manifest(ctx, srcdir, testdir, report, test_base, parse_base): @@ -348,6 +348,14 @@ def test_manifest(ctx, srcdir, testdir, report, test_base, parse_base):
passed = run_test(action_node, 1)
report.write(earl_assertion(test, passed, asserter))
for i in sorted(model.triples([None, rdf.type, rdft.TestTurtleNegativeEval])):
test = i[0]
name = model.value(test, mf.name, None)
action_node = model.value(test, mf.action, None)[len(test_base):]
passed = run_test(action_node, 1)
report.write(earl_assertion(test, passed, asserter))
for i in sorted(model.triples([None, rdf.type, rdft.TestTurtleEval])):
test = i[0]
name = model.value(test, mf.name, None)

Loading…
Cancel
Save