|
|
|
@ -12,6 +12,7 @@ import sys
@@ -12,6 +12,7 @@ import sys
|
|
|
|
|
|
|
|
|
|
class WorkingDirectory: |
|
|
|
|
"Scoped context for changing working directory" |
|
|
|
|
|
|
|
|
|
def __init__(self, working_dir): |
|
|
|
|
self.original_dir = os.getcwd() |
|
|
|
|
self.working_dir = working_dir |
|
|
|
@ -28,34 +29,34 @@ class WorkingDirectory:
@@ -28,34 +29,34 @@ class WorkingDirectory:
|
|
|
|
|
|
|
|
|
|
def filename(n): |
|
|
|
|
"Filename for a generated file with n statements" |
|
|
|
|
return 'gen%d.ttl' % n |
|
|
|
|
return "gen%d.ttl" % n |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def gen(sp2b_dir, n_min, n_max, step): |
|
|
|
|
"Generate files with n_min ... n_max statements if they are not present" |
|
|
|
|
with WorkingDirectory(sp2b_dir) as dir: |
|
|
|
|
for n in range(n_min, n_max + step, step): |
|
|
|
|
out_path = os.path.join(dir.original_dir, 'build', filename(n)) |
|
|
|
|
out_path = os.path.join(dir.original_dir, "build", filename(n)) |
|
|
|
|
if not os.path.exists(out_path): |
|
|
|
|
subprocess.call(['./sp2b_gen', '-t', str(n), out_path]) |
|
|
|
|
subprocess.call(["./sp2b_gen", "-t", str(n), out_path]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def write_header(results, progs): |
|
|
|
|
"Write the header line for TSV output" |
|
|
|
|
results.write('n') |
|
|
|
|
results.write("n") |
|
|
|
|
for prog in progs: |
|
|
|
|
results.write('\t' + os.path.basename(prog.split()[0])) |
|
|
|
|
results.write('\n') |
|
|
|
|
results.write("\t" + os.path.basename(prog.split()[0])) |
|
|
|
|
results.write("\n") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_time(report): |
|
|
|
|
"Return user time and max RSS from a /usr/bin/time -v report" |
|
|
|
|
time = memory = None |
|
|
|
|
for line in report.split('\n'): |
|
|
|
|
if line.startswith('\tUser time'): |
|
|
|
|
time = float(line[line.find(':') + 1:]) |
|
|
|
|
elif line.startswith('\tMaximum resident set'): |
|
|
|
|
memory = float(line[line.find(':') + 1:]) * 1024 |
|
|
|
|
for line in report.split("\n"): |
|
|
|
|
if line.startswith("\tUser time"): |
|
|
|
|
time = float(line[line.find(":") + 1 :]) |
|
|
|
|
elif line.startswith("\tMaximum resident set"): |
|
|
|
|
memory = float(line[line.find(":") + 1 :]) * 1024 |
|
|
|
|
|
|
|
|
|
return (time, memory) |
|
|
|
|
|
|
|
|
@ -65,9 +66,9 @@ def get_dashes():
@@ -65,9 +66,9 @@ def get_dashes():
|
|
|
|
|
dash = 2.0 |
|
|
|
|
space = dot = 0.75 |
|
|
|
|
|
|
|
|
|
yield [] # Solid |
|
|
|
|
yield [] # Solid |
|
|
|
|
yield [dash, space] # Dashed |
|
|
|
|
yield [dot, space] # Dotted |
|
|
|
|
yield [dot, space] # Dotted |
|
|
|
|
|
|
|
|
|
# Dash-dots, with increasing number of dots for each line |
|
|
|
|
for i in itertools.count(2): |
|
|
|
@ -77,14 +78,14 @@ def get_dashes():
@@ -77,14 +78,14 @@ def get_dashes():
|
|
|
|
|
def plot(in_file, out_filename, x_label, y_label, y_max=None): |
|
|
|
|
"Plot a TSV file as SVG" |
|
|
|
|
|
|
|
|
|
matplotlib.use('agg') |
|
|
|
|
matplotlib.use("agg") |
|
|
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
|
|
fig_height = 4.0 |
|
|
|
|
dashes = get_dashes() |
|
|
|
|
markers = itertools.cycle(['o', 's', 'v', 'D', '*', 'p', 'P', 'h', 'X']) |
|
|
|
|
markers = itertools.cycle(["o", "s", "v", "D", "*", "p", "P", "h", "X"]) |
|
|
|
|
|
|
|
|
|
reader = csv.reader(in_file, delimiter='\t') |
|
|
|
|
reader = csv.reader(in_file, delimiter="\t") |
|
|
|
|
header = next(reader) |
|
|
|
|
cols = [x for x in zip(*list(reader))] |
|
|
|
|
|
|
|
|
@ -98,32 +99,36 @@ def plot(in_file, out_filename, x_label, y_label, y_max=None):
@@ -98,32 +99,36 @@ def plot(in_file, out_filename, x_label, y_label, y_max=None):
|
|
|
|
|
if y_max is not None: |
|
|
|
|
ax.set_ylim([0.0, y_max]) |
|
|
|
|
|
|
|
|
|
ax.grid(linewidth=0.25, linestyle=':', color='0', dashes=[0.2, 1.6]) |
|
|
|
|
ax.ticklabel_format(style='sci', scilimits=(4, 0), useMathText=True) |
|
|
|
|
ax.tick_params(axis='both', width=0.75) |
|
|
|
|
ax.grid(linewidth=0.25, linestyle=":", color="0", dashes=[0.2, 1.6]) |
|
|
|
|
ax.ticklabel_format(style="sci", scilimits=(4, 0), useMathText=True) |
|
|
|
|
ax.tick_params(axis="both", width=0.75) |
|
|
|
|
|
|
|
|
|
x = list(map(float, cols[0])) |
|
|
|
|
for i, y in enumerate(cols[1::]): |
|
|
|
|
ax.plot(x, |
|
|
|
|
list(map(float, y)), |
|
|
|
|
label=header[i + 1], |
|
|
|
|
marker=next(markers), |
|
|
|
|
dashes=next(dashes), |
|
|
|
|
markersize=3.0, |
|
|
|
|
linewidth=1.0) |
|
|
|
|
ax.plot( |
|
|
|
|
x, |
|
|
|
|
list(map(float, y)), |
|
|
|
|
label=header[i + 1], |
|
|
|
|
marker=next(markers), |
|
|
|
|
dashes=next(dashes), |
|
|
|
|
markersize=3.0, |
|
|
|
|
linewidth=1.0, |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
plt.legend() |
|
|
|
|
plt.savefig(out_filename, bbox_inches='tight', pad_inches=0.025) |
|
|
|
|
plt.savefig(out_filename, bbox_inches="tight", pad_inches=0.025) |
|
|
|
|
plt.close() |
|
|
|
|
sys.stderr.write('wrote {}\n'.format(out_filename)) |
|
|
|
|
sys.stderr.write("wrote {}\n".format(out_filename)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run(progs, n_min, n_max, step): |
|
|
|
|
"Benchmark each program with n_min ... n_max statements" |
|
|
|
|
with WorkingDirectory('build'): |
|
|
|
|
results = {'time': open('serdi-time.txt', 'w'), |
|
|
|
|
'throughput': open('serdi-throughput.txt', 'w'), |
|
|
|
|
'memory': open('serdi-memory.txt', 'w')} |
|
|
|
|
with WorkingDirectory("build"): |
|
|
|
|
results = { |
|
|
|
|
"time": open("serdi-time.txt", "w"), |
|
|
|
|
"throughput": open("serdi-throughput.txt", "w"), |
|
|
|
|
"memory": open("serdi-memory.txt", "w"), |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# Write TSV header for all output files |
|
|
|
|
for name, f in results.items(): |
|
|
|
@ -137,70 +142,94 @@ def run(progs, n_min, n_max, step):
@@ -137,70 +142,94 @@ def run(progs, n_min, n_max, step):
|
|
|
|
|
|
|
|
|
|
# Run each program and fill rows with measurements |
|
|
|
|
for prog in progs: |
|
|
|
|
cmd = '/usr/bin/time -v ' + prog + ' ' + filename(n) |
|
|
|
|
with open(filename(n) + '.out', 'w') as out: |
|
|
|
|
sys.stderr.write(cmd + '\n') |
|
|
|
|
cmd = "/usr/bin/time -v " + prog + " " + filename(n) |
|
|
|
|
with open(filename(n) + ".out", "w") as out: |
|
|
|
|
sys.stderr.write(cmd + "\n") |
|
|
|
|
proc = subprocess.Popen( |
|
|
|
|
cmd.split(), stdout=out, stderr=subprocess.PIPE) |
|
|
|
|
cmd.split(), stdout=out, stderr=subprocess.PIPE |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
time, memory = parse_time(proc.communicate()[1].decode()) |
|
|
|
|
rows['time'] += ['%.07f' % time] |
|
|
|
|
rows['throughput'] += ['%d' % (n / time)] |
|
|
|
|
rows['memory'] += [str(memory)] |
|
|
|
|
rows["time"] += ["%.07f" % time] |
|
|
|
|
rows["throughput"] += ["%d" % (n / time)] |
|
|
|
|
rows["memory"] += [str(memory)] |
|
|
|
|
|
|
|
|
|
# Write rows to output files |
|
|
|
|
for name, f in results.items(): |
|
|
|
|
f.write('\t'.join(rows[name]) + '\n') |
|
|
|
|
f.write("\t".join(rows[name]) + "\n") |
|
|
|
|
|
|
|
|
|
for name, f in results.items(): |
|
|
|
|
tsv_filename = 'serdi-%s.txt' % name |
|
|
|
|
sys.stderr.write('wrote %s\n' % tsv_filename) |
|
|
|
|
tsv_filename = "serdi-%s.txt" % name |
|
|
|
|
sys.stderr.write("wrote %s\n" % tsv_filename) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def plot_results(): |
|
|
|
|
"Plot all benchmark results" |
|
|
|
|
with WorkingDirectory('build'): |
|
|
|
|
plot(open('serdi-time.txt', 'r'), 'serdi-time.svg', |
|
|
|
|
'Statements', 'Time (s)') |
|
|
|
|
plot(open('serdi-throughput.txt', 'r'), 'serdi-throughput.svg', |
|
|
|
|
'Statements', 'Statements / s') |
|
|
|
|
plot(open('serdi-memory.txt', 'r'), 'serdi-memory.svg', |
|
|
|
|
'Statements', 'Bytes') |
|
|
|
|
with WorkingDirectory("build"): |
|
|
|
|
plot( |
|
|
|
|
open("serdi-time.txt", "r"), |
|
|
|
|
"serdi-time.svg", |
|
|
|
|
"Statements", |
|
|
|
|
"Time (s)", |
|
|
|
|
) |
|
|
|
|
plot( |
|
|
|
|
open("serdi-throughput.txt", "r"), |
|
|
|
|
"serdi-throughput.svg", |
|
|
|
|
"Statements", |
|
|
|
|
"Statements / s", |
|
|
|
|
) |
|
|
|
|
plot( |
|
|
|
|
open("serdi-memory.txt", "r"), |
|
|
|
|
"serdi-memory.svg", |
|
|
|
|
"Statements", |
|
|
|
|
"Bytes", |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
|
|
class OptParser(optparse.OptionParser): |
|
|
|
|
def format_epilog(self, formatter): |
|
|
|
|
return self.expand_prog_name(self.epilog) |
|
|
|
|
|
|
|
|
|
opt = OptParser( |
|
|
|
|
usage='%prog [OPTION]... SP2B_DIR', |
|
|
|
|
description='Benchmark RDF reading and writing commands\n', |
|
|
|
|
epilog=''' |
|
|
|
|
usage="%prog [OPTION]... SP2B_DIR", |
|
|
|
|
description="Benchmark RDF reading and writing commands\n", |
|
|
|
|
epilog=""" |
|
|
|
|
Example: |
|
|
|
|
%prog --max 100000 \\ |
|
|
|
|
--run 'rapper -i turtle -o turtle' \\ |
|
|
|
|
--run 'riot --output=ttl' \\ |
|
|
|
|
--run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/ |
|
|
|
|
''') |
|
|
|
|
|
|
|
|
|
opt.add_option('--max', type='int', default=1000000, |
|
|
|
|
help='maximum triple count') |
|
|
|
|
opt.add_option('--run', type='string', action='append', default=[], |
|
|
|
|
help='additional command to run (input file is appended)') |
|
|
|
|
opt.add_option('--no-generate', action='store_true', |
|
|
|
|
help='do not generate data') |
|
|
|
|
opt.add_option('--no-execute', action='store_true', |
|
|
|
|
help='do not run benchmarks') |
|
|
|
|
opt.add_option('--no-plot', action='store_true', |
|
|
|
|
help='do not plot benchmarks') |
|
|
|
|
""", |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
opt.add_option( |
|
|
|
|
"--max", type="int", default=1000000, help="maximum triple count" |
|
|
|
|
) |
|
|
|
|
opt.add_option( |
|
|
|
|
"--run", |
|
|
|
|
type="string", |
|
|
|
|
action="append", |
|
|
|
|
default=[], |
|
|
|
|
help="additional command to run (input file is appended)", |
|
|
|
|
) |
|
|
|
|
opt.add_option( |
|
|
|
|
"--no-generate", action="store_true", help="do not generate data" |
|
|
|
|
) |
|
|
|
|
opt.add_option( |
|
|
|
|
"--no-execute", action="store_true", help="do not run benchmarks" |
|
|
|
|
) |
|
|
|
|
opt.add_option( |
|
|
|
|
"--no-plot", action="store_true", help="do not plot benchmarks" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
(options, args) = opt.parse_args() |
|
|
|
|
if len(args) != 1: |
|
|
|
|
opt.print_usage() |
|
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
|
progs = ['serdi -b -f -i turtle -o turtle'] + options.run |
|
|
|
|
progs = ["serdi -b -f -i turtle -o turtle"] + options.run |
|
|
|
|
min_n = int(options.max / 10) |
|
|
|
|
max_n = options.max |
|
|
|
|
step = min_n |
|
|
|
|