Browse Source

Format serd_bench.py with black

zrythm_meson
David Robillard 2 years ago
parent
commit
1b8dafe596
  1. 159
      scripts/serd_bench.py

159
scripts/serd_bench.py

@ -12,6 +12,7 @@ import sys @@ -12,6 +12,7 @@ import sys
class WorkingDirectory:
"Scoped context for changing working directory"
def __init__(self, working_dir):
self.original_dir = os.getcwd()
self.working_dir = working_dir
@ -28,34 +29,34 @@ class WorkingDirectory: @@ -28,34 +29,34 @@ class WorkingDirectory:
def filename(n):
"Filename for a generated file with n statements"
return 'gen%d.ttl' % n
return "gen%d.ttl" % n
def gen(sp2b_dir, n_min, n_max, step):
"Generate files with n_min ... n_max statements if they are not present"
with WorkingDirectory(sp2b_dir) as dir:
for n in range(n_min, n_max + step, step):
out_path = os.path.join(dir.original_dir, 'build', filename(n))
out_path = os.path.join(dir.original_dir, "build", filename(n))
if not os.path.exists(out_path):
subprocess.call(['./sp2b_gen', '-t', str(n), out_path])
subprocess.call(["./sp2b_gen", "-t", str(n), out_path])
def write_header(results, progs):
"Write the header line for TSV output"
results.write('n')
results.write("n")
for prog in progs:
results.write('\t' + os.path.basename(prog.split()[0]))
results.write('\n')
results.write("\t" + os.path.basename(prog.split()[0]))
results.write("\n")
def parse_time(report):
"Return user time and max RSS from a /usr/bin/time -v report"
time = memory = None
for line in report.split('\n'):
if line.startswith('\tUser time'):
time = float(line[line.find(':') + 1:])
elif line.startswith('\tMaximum resident set'):
memory = float(line[line.find(':') + 1:]) * 1024
for line in report.split("\n"):
if line.startswith("\tUser time"):
time = float(line[line.find(":") + 1 :])
elif line.startswith("\tMaximum resident set"):
memory = float(line[line.find(":") + 1 :]) * 1024
return (time, memory)
@ -65,9 +66,9 @@ def get_dashes(): @@ -65,9 +66,9 @@ def get_dashes():
dash = 2.0
space = dot = 0.75
yield [] # Solid
yield [] # Solid
yield [dash, space] # Dashed
yield [dot, space] # Dotted
yield [dot, space] # Dotted
# Dash-dots, with increasing number of dots for each line
for i in itertools.count(2):
@ -77,14 +78,14 @@ def get_dashes(): @@ -77,14 +78,14 @@ def get_dashes():
def plot(in_file, out_filename, x_label, y_label, y_max=None):
"Plot a TSV file as SVG"
matplotlib.use('agg')
matplotlib.use("agg")
import matplotlib.pyplot as plt
fig_height = 4.0
dashes = get_dashes()
markers = itertools.cycle(['o', 's', 'v', 'D', '*', 'p', 'P', 'h', 'X'])
markers = itertools.cycle(["o", "s", "v", "D", "*", "p", "P", "h", "X"])
reader = csv.reader(in_file, delimiter='\t')
reader = csv.reader(in_file, delimiter="\t")
header = next(reader)
cols = [x for x in zip(*list(reader))]
@ -98,32 +99,36 @@ def plot(in_file, out_filename, x_label, y_label, y_max=None): @@ -98,32 +99,36 @@ def plot(in_file, out_filename, x_label, y_label, y_max=None):
if y_max is not None:
ax.set_ylim([0.0, y_max])
ax.grid(linewidth=0.25, linestyle=':', color='0', dashes=[0.2, 1.6])
ax.ticklabel_format(style='sci', scilimits=(4, 0), useMathText=True)
ax.tick_params(axis='both', width=0.75)
ax.grid(linewidth=0.25, linestyle=":", color="0", dashes=[0.2, 1.6])
ax.ticklabel_format(style="sci", scilimits=(4, 0), useMathText=True)
ax.tick_params(axis="both", width=0.75)
x = list(map(float, cols[0]))
for i, y in enumerate(cols[1::]):
ax.plot(x,
list(map(float, y)),
label=header[i + 1],
marker=next(markers),
dashes=next(dashes),
markersize=3.0,
linewidth=1.0)
ax.plot(
x,
list(map(float, y)),
label=header[i + 1],
marker=next(markers),
dashes=next(dashes),
markersize=3.0,
linewidth=1.0,
)
plt.legend()
plt.savefig(out_filename, bbox_inches='tight', pad_inches=0.025)
plt.savefig(out_filename, bbox_inches="tight", pad_inches=0.025)
plt.close()
sys.stderr.write('wrote {}\n'.format(out_filename))
sys.stderr.write("wrote {}\n".format(out_filename))
def run(progs, n_min, n_max, step):
"Benchmark each program with n_min ... n_max statements"
with WorkingDirectory('build'):
results = {'time': open('serdi-time.txt', 'w'),
'throughput': open('serdi-throughput.txt', 'w'),
'memory': open('serdi-memory.txt', 'w')}
with WorkingDirectory("build"):
results = {
"time": open("serdi-time.txt", "w"),
"throughput": open("serdi-throughput.txt", "w"),
"memory": open("serdi-memory.txt", "w"),
}
# Write TSV header for all output files
for name, f in results.items():
@ -137,70 +142,94 @@ def run(progs, n_min, n_max, step): @@ -137,70 +142,94 @@ def run(progs, n_min, n_max, step):
# Run each program and fill rows with measurements
for prog in progs:
cmd = '/usr/bin/time -v ' + prog + ' ' + filename(n)
with open(filename(n) + '.out', 'w') as out:
sys.stderr.write(cmd + '\n')
cmd = "/usr/bin/time -v " + prog + " " + filename(n)
with open(filename(n) + ".out", "w") as out:
sys.stderr.write(cmd + "\n")
proc = subprocess.Popen(
cmd.split(), stdout=out, stderr=subprocess.PIPE)
cmd.split(), stdout=out, stderr=subprocess.PIPE
)
time, memory = parse_time(proc.communicate()[1].decode())
rows['time'] += ['%.07f' % time]
rows['throughput'] += ['%d' % (n / time)]
rows['memory'] += [str(memory)]
rows["time"] += ["%.07f" % time]
rows["throughput"] += ["%d" % (n / time)]
rows["memory"] += [str(memory)]
# Write rows to output files
for name, f in results.items():
f.write('\t'.join(rows[name]) + '\n')
f.write("\t".join(rows[name]) + "\n")
for name, f in results.items():
tsv_filename = 'serdi-%s.txt' % name
sys.stderr.write('wrote %s\n' % tsv_filename)
tsv_filename = "serdi-%s.txt" % name
sys.stderr.write("wrote %s\n" % tsv_filename)
def plot_results():
"Plot all benchmark results"
with WorkingDirectory('build'):
plot(open('serdi-time.txt', 'r'), 'serdi-time.svg',
'Statements', 'Time (s)')
plot(open('serdi-throughput.txt', 'r'), 'serdi-throughput.svg',
'Statements', 'Statements / s')
plot(open('serdi-memory.txt', 'r'), 'serdi-memory.svg',
'Statements', 'Bytes')
with WorkingDirectory("build"):
plot(
open("serdi-time.txt", "r"),
"serdi-time.svg",
"Statements",
"Time (s)",
)
plot(
open("serdi-throughput.txt", "r"),
"serdi-throughput.svg",
"Statements",
"Statements / s",
)
plot(
open("serdi-memory.txt", "r"),
"serdi-memory.svg",
"Statements",
"Bytes",
)
if __name__ == "__main__":
class OptParser(optparse.OptionParser):
def format_epilog(self, formatter):
return self.expand_prog_name(self.epilog)
opt = OptParser(
usage='%prog [OPTION]... SP2B_DIR',
description='Benchmark RDF reading and writing commands\n',
epilog='''
usage="%prog [OPTION]... SP2B_DIR",
description="Benchmark RDF reading and writing commands\n",
epilog="""
Example:
%prog --max 100000 \\
--run 'rapper -i turtle -o turtle' \\
--run 'riot --output=ttl' \\
--run 'rdfpipe -i turtle -o turtle' /path/to/sp2b/src/
''')
opt.add_option('--max', type='int', default=1000000,
help='maximum triple count')
opt.add_option('--run', type='string', action='append', default=[],
help='additional command to run (input file is appended)')
opt.add_option('--no-generate', action='store_true',
help='do not generate data')
opt.add_option('--no-execute', action='store_true',
help='do not run benchmarks')
opt.add_option('--no-plot', action='store_true',
help='do not plot benchmarks')
""",
)
opt.add_option(
"--max", type="int", default=1000000, help="maximum triple count"
)
opt.add_option(
"--run",
type="string",
action="append",
default=[],
help="additional command to run (input file is appended)",
)
opt.add_option(
"--no-generate", action="store_true", help="do not generate data"
)
opt.add_option(
"--no-execute", action="store_true", help="do not run benchmarks"
)
opt.add_option(
"--no-plot", action="store_true", help="do not plot benchmarks"
)
(options, args) = opt.parse_args()
if len(args) != 1:
opt.print_usage()
sys.exit(1)
progs = ['serdi -b -f -i turtle -o turtle'] + options.run
progs = ["serdi -b -f -i turtle -o turtle"] + options.run
min_n = int(options.max / 10)
max_n = options.max
step = min_n

Loading…
Cancel
Save