# # # add_file "benchmarks.py" # content [c4e1e7c4503221fd461f5796f54d99a309846df5] # # add_file "driver.py" # content [166389c362ec9c22019dc818580d9aaeed8b691c] # # add_file "instrumenter.py" # content [db37331dcb3b7aedee7e35b00e7ad285e394b9be] # # add_file "instrumenters.py" # content [ed7cb561de98eaffea72e7e92c5abede0c73ab71] # # add_file "mtn.py" # content [fa50a6d6a4fa3c4cf880dc15b3aad40d212cccf4] # # add_file "repo.py" # content [6f250d61245dc45bb1815a3864c454532ed2b69c] # # add_file "util.py" # content [b7817417a6cfd134019cee7ecbce189f50f49080] # # patch "speedtest.py" # from [16e2fd46d49bca1500b688a446f7c7210198e864] # to [046bbaefe0508e1f04da57193c4abc84e73b5363] # ============================================================ --- benchmarks.py c4e1e7c4503221fd461f5796f54d99a309846df5 +++ benchmarks.py c4e1e7c4503221fd461f5796f54d99a309846df5 @@ -0,0 +1,12 @@ +class PullBenchmark(object): + def __init__(self, repo_source): + self.repo_source = repo_source + + def setup(self, vcs): + vcs.init_repo("target") + self.repo_source.setup() + + def run(self, vcs): + vcs.pull(self.repo_source.repo_path(), "target") + + ============================================================ --- driver.py 166389c362ec9c22019dc818580d9aaeed8b691c +++ driver.py 166389c362ec9c22019dc818580d9aaeed8b691c @@ -0,0 +1,65 @@ +import random +import os +import os.path +import shutil + +class Driver(object): + def __init__(self, scratch, results, testables, benchmarks, instrumenters, + debug, cache_clearer): + self.scratch = os.path.abspath(scratch) + self.results = os.path.abspath(results) + self.testables = testables + self.benchmarks = benchmarks + self.instrumenters = instrumenters + self.debug = debug + self.cache_clearer = cache_clearer + + def run(self): + startdir = os.getcwd() + for benchmark_name, benchmark_obj in self.benchmarks.iteritems(): + benchdir = os.path.join(self.scratch, benchmark_name) + + instrumenters = {} + for testable_name, testable_f in self.testables.iteritems(): + testable = testable_f.new(Instrumenter("")) + setupdir = os.path.join(benchdir, "setup-" + testable_name) + ensure_dir(setupdir) + os.chdir(setupdir) + testable.setup() + benchmark_obj.setup(testable) + + + for instrumenter_name, instrumenter_f in self.instrumenters.iteritems(): + instrumenters = {} + for testable_name in self.testables: + recorddir = os.path.join(self.results, + "%s-%s-%s" % (benchmark_name, + testable_name, + instrumenter_name)) + instrumenters[testable_name] = instrumenter_f.new(recorddir) + + for repeat in xrange(instrumenter_f.repeats): + plan = self.testables.keys() + random.shuffle(plan) + for testable_name in plan: + instrumenter = instrumenters[testable_name] + testable = self.testables[testable_name].new(instrumenter) + rundir = os.path.join(benchdir, + "run-%s-%s-%s" % (testable_name, + instrumenter_name, + repeat)) + shutil.copytree(os.path.join(benchdir, + "setup-" + testable_name), + rundir) + os.chdir(rundir) + self.cache_clearer() + benchmark_obj.run(testable) + instrumenter.flush() + if not self.debug: + os.chdir(startdir) + shutil.rmtree(rundir) + + if not self.debug: + for testable_name in self.testables.iterkeys(): + shutil.rmtree(os.path.join(benchdir, "setup-" + testable_name)) + ============================================================ --- instrumenter.py db37331dcb3b7aedee7e35b00e7ad285e394b9be +++ instrumenter.py db37331dcb3b7aedee7e35b00e7ad285e394b9be @@ -0,0 +1,96 @@ +import shutil +import os +import subprocess +import csv + +class Process(object): + # We stuff the called process into its own process group using the + # preexec_fn hack. We need to do this so that we can clean up + # everything that is spawned. In particular, if we run 'time mtn serve', + # then without the process group hack we can only kill the 'time' process, + # not the 'mtn serve' process (!). + def __init__(self, cmd): + self.popen = subprocess.Popen(cmd, preexec_fn=os.setsid) + self.end_hook = None + + # We use a negative pid to kill the group, not just the process. + # We use SIGINT, because if we send SIGTERM to time(1), it gives no + # output, but for SIGINT it does. + def kill(self): + os.kill(-self.popen.pid, 2) # SIGINT + return self.wait() + + def wait(self): + stdout, stderr = self.popen.communicate() + returncode = self.popen.wait() + result = (returncode, stdout, stderr) + if self.end_hook is not None: + self.end_hook(result) + return result + + def hook(self, hook): + self.end_hook = hook + + def __del__(self): + if self.popen is not None and self.popen.poll() is None: + os.kill(self.popen.pid, 9) + +class InstrumenterFactory(object): + def __init__(self, klass, repeats=1): + self.klass = klass + self.repeats = repeats + + def new(self, record_dir): + return self.klass(record_dir) + +class Instrumenter(object): + def __init__(self, record_dir): + pass + + def run(self, name, cmd): + # Runs the executable with the given args, and uses 'name' to record + # instrumented results under. Implemented in terms of run_bg. + return self.run_bg(name, cmd).wait() + + def run_bg(self, name, cmd): + # Same as above, but immediately returns an object that one should + # later call 'kill' or 'wait' on. One need only override this to + # override behavior. + return Process(cmd) + + def record_stat(self, name, value): + pass + + def record_file(self, name, path): + pass + + def flush(self): + pass + +class RecordingInstrumenter(Instrumenter): + def __init__(self, record_dir): + self.dir = record_dir + ensure_dir(self.dir) + self.stats = {} + + def record_stat(self, name, value): + self.stats.setdefault(name, []).append(value) + + def flush(self): + f = open(os.path.join(self.dir, "stats.csv"), "w") + w = csv.writer(f) + items = self.stats.items() + items.sort() + for key, values in items: + w.writerow([key] + values) + f.close() + + def record_file(self, name, path): + target_path = os.path.join(self.dir, name) + if os.path.exists(target_path): + raise KeyError, name + if os.path.isdir(path): + shutil.copytree(path, target_path) + else: + shutil.copy2(path, target_path) + ============================================================ --- instrumenters.py ed7cb561de98eaffea72e7e92c5abede0c73ab71 +++ instrumenters.py ed7cb561de98eaffea72e7e92c5abede0c73ab71 @@ -0,0 +1,32 @@ +import instrumenter + +class TimingInstrumenter(instrumenter.RecordingInstrumenter): + def parse_time_str(self, s): + # 1.7 -> 1.7 + # 1:20.3 -> 80.3 + if ":" in s: + minutes, seconds = s.split(":") + return 60 * int(minutes) + float(seconds) + else: + return float(s) + + def run_bg(self, name, cmd): + # We put a noticable string "DATA" at the start of the format, so that + # we can find it even if time(1) decides to print other garbage, like + # "Command exited with non-zero status 1". + my_cmd = ["time", "-f", "DATA: %U %S %E", "-o", "timings-" + name] + cmd + def timing_hook(result): + timing_file = open("timings-" + name, "r") + for line in timing_file: + if line.startswith("DATA"): + break + assert line.startswith("DATA") + junk, user, sys, wall = line.split() + self.record_stat(name + "-user-time", self.parse_time_str(user)) + self.record_stat(name + "-system-time", self.parse_time_str(sys)) + self.record_stat(name + "-wall-time", self.parse_time_str(wall)) + process = super(TimingInstrumenter, self).run_bg(name, my_cmd) + process.hook(timing_hook) + return process + + ============================================================ --- mtn.py fa50a6d6a4fa3c4cf880dc15b3aad40d212cccf4 +++ mtn.py fa50a6d6a4fa3c4cf880dc15b3aad40d212cccf4 @@ -0,0 +1,76 @@ +# VCSes all have a factory, with method: .new(instrumenter) + +class Mtn(object): + def __init__(self, path): + self.path = path + + def new(self, instrumenter): + return Mtn(self.path, instrumenter) + +# VCS objects are returned by this factory, and have a .setup() method, and so +# far, .pull(source, target) and .init_repo(path) methods. + +class MtnObj(object): + def __init__(self, path, instrumenter): + self.path = path + self.instrumenter = instrumenter + + def setup(self): + shutil.copy(self.path, "mtn-server") + shutil.copy(self.path, "mtn-client") + shutil.copy(self.path, "mtn") + f = open("server-perms.lua", "w") + f.write(""" + function get_netsync_read_permitted(pattern, identity) + return true + end + function get_netsync_write_permitted(identity) + return true + end + function get_passphrase(keyid) + return keyid + end + """) + f.close() + os.mkdir("keys") + f = open("keys/address@hidden", "w") + f.write("""[keypair address@hidden +MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC6Pz+IvvOCDDqzN9WFO/zOjL7s9dVCS+zn +s/L9jQ2kHfNWXFof4GcgmMu4DfU4sUrRz39QxDlUrxEOvmIc9z3DNuIcbFZx7UZg9DWfdmDm +vbW79bZlVMeIudAIUwa6euX163AK5hacmqJfuB5U7awQM9o3rn8JYULleAoz5QTtawIDAQAB# +MIICyTBDBgkqhkiG9w0BBQ0wNjAeBgkqhkiG9w0BBQwwEQQIvYSV8ucj9m4CAggAAgEYMBQG +CCqGSIb3DQMHBAg/BZPM2O3QfASCAoBBGkVz4E/Pr1CsIioC92eCz4qWLclhc53HgHSCEo9I +XdNCTpCs/oxOXhQ0WQCPFhYEaxU8STgZm0Yhq8WEF1QfxOPOU8nDiwMT0L7/ARruu5bTCxnW +B3kkn+XiO5GldVJhULFlrl91t83yMsTSw+vyCyxZkqewBLR7mqHQUe2suVquMyutxxr2vZgV +QMfRxk65fSvySUHeNaj1dmakYcpP+35iejyUTAtAGuBsv2C68bwif4wkpLpedghNCtmccSdQ +t9QDF3yy6Q42tAW/OK6/t836/qn39f+47Kp4LMJUMmxNrtV7IntIkgBGgnGsqP9Br2B4GYXc +sWK0YApA3+Sf3kfH/wQ6Hib8nN4YxUTxxnS9WNHvRFrXCmfbGd5vAzi4lKCm/W+2Nlpd4DDQ +3JZjjCR73PMfKtHJCGULkNkK/9kRyhLYql2u/ZUJoEcdZxzEpYgExW8Wu1CrCVtWd+ueXs1h +or6Fdua7Gg4cjMgVg6EUSxdMBFQCX8heD8JeG6jMFNR9hTxe8o/PK8Ys63JyLMLRUv3Ud+f8 +8T0TtCZV5+rgLfvb6k89uDJJK228WuJB6rp8S+qqq30RFPmkzW8JNulRilY+wrIfcowA6+TA +T5WKzFOIbkZd/R34tNLJMjTJlUq6SQKaOlQnqOEFbyY/GXgzYgnmc3tl8pigXEJvNzU5GiuB +ib35QQbzh87KlfLtWELK+8ZoyhZAZAMr97IavUbuFubOyEoEozUliARyRZ1ZudM4Ii+J6TRX +cmLryIBlz3OXgUUBSwJPwtWuR4tZ8nIt7cVJr7pxLblGfeFuu01HWN55hv4C78/aNSipVYCF +OFt8n7wQHxbbJvoTIdd/ +[end]""") + f.close() + + + def pull(self, source, target): + server = self.instrumenter.run_bg("server", + ["./mtn-server", + "--rcfile", "server-perms.lua", + "--keydir", "keys", + "-d", source, + "--bind=localhost:12345", + "serve", "*"]) + time.sleep(3) + self.instrumenter.run("pull", + ["./mtn-client", "-d", target, "pull", "localhost:12345", "*"]) + server.kill() + + def init_repo(self, repo): + self.instrumenter.run("init_repo", + ["./mtn", "db", "init", "-d", repo]) + + ============================================================ --- repo.py 6f250d61245dc45bb1815a3864c454532ed2b69c +++ repo.py 6f250d61245dc45bb1815a3864c454532ed2b69c @@ -0,0 +1,16 @@ +import os.path + +# repos have setup() and repo_path() methods. + +class ExistingRepo(object): + def __init__(self, path): + self.path = os.path.abspath(path) + + def setup(self): + pass + + def repo_path(self): + return self.path + + +# TODO: add synthetic repo generation classes ============================================================ --- util.py b7817417a6cfd134019cee7ecbce189f50f49080 +++ util.py b7817417a6cfd134019cee7ecbce189f50f49080 @@ -0,0 +1,18 @@ +import os +import os.path + +def ensure_dir(path): + if not os.path.exists(path): + os.makedirs(path) + +class CacheClearer: + def __init__(self, executable): + if not executable or not os.path.exists(executable): + print "No such path %s, not clearing caches" % executable + self.executable = None + else: + self.executable = os.path.abspath(executable) + + def __call__(self): + os.system(self.executable) + ============================================================ --- speedtest.py 16e2fd46d49bca1500b688a446f7c7210198e864 +++ speedtest.py 046bbaefe0508e1f04da57193c4abc84e73b5363 @@ -1,136 +1,9 @@ -import random import os import os.path -import subprocess + import shutil import time -import csv -def ensure_dir(path): - if not os.path.exists(path): - os.makedirs(path) - -class Process(object): - # We stuff the called process into its own process group using the - # preexec_fn hack. We need to do this so that we can clean up - # everything that is spawned. In particular, if we run 'time mtn serve', - # then without the process group hack we can only kill the 'time' process, - # not the 'mtn serve' process (!). - def __init__(self, cmd): - self.popen = subprocess.Popen(cmd, preexec_fn=os.setsid) - self.end_hook = None - - # We use a negative pid to kill the group, not just the process. - # We use SIGINT, because if we send SIGTERM to time(1), it gives no - # output, but for SIGINT it does. - def kill(self): - os.kill(-self.popen.pid, 2) # SIGINT - return self.wait() - - def wait(self): - stdout, stderr = self.popen.communicate() - returncode = self.popen.wait() - result = (returncode, stdout, stderr) - if self.end_hook is not None: - self.end_hook(result) - return result - - def hook(self, hook): - self.end_hook = hook - - def __del__(self): - if self.popen is not None and self.popen.poll() is None: - os.kill(self.popen.pid, 9) - -class InstrumenterFactory(object): - def __init__(self, klass, repeats=1): - self.klass = klass - self.repeats = repeats - - def new(self, record_dir): - return self.klass(record_dir) - -class Instrumenter(object): - def __init__(self, record_dir): - pass - - def run(self, name, cmd): - # Runs the executable with the given args, and uses 'name' to record - # instrumented results under. Implemented in terms of run_bg. - return self.run_bg(name, cmd).wait() - - def run_bg(self, name, cmd): - # Same as above, but immediately returns an object that one should - # later call 'kill' or 'wait' on. One need only override this to - # override behavior. - return Process(cmd) - - def record_stat(self, name, value): - pass - - def record_file(self, name, path): - pass - - def flush(self): - pass - -class RecordingInstrumenter(Instrumenter): - def __init__(self, record_dir): - self.dir = record_dir - ensure_dir(self.dir) - self.stats = {} - - def record_stat(self, name, value): - self.stats.setdefault(name, []).append(value) - - def flush(self): - f = open(os.path.join(self.dir, "stats.csv"), "w") - w = csv.writer(f) - items = self.stats.items() - items.sort() - for key, values in items: - w.writerow([key] + values) - f.close() - - def record_file(self, name, path): - target_path = os.path.join(self.dir, name) - if os.path.exists(target_path): - raise KeyError, name - if os.path.isdir(path): - shutil.copytree(path, target_path) - else: - shutil.copy2(path, target_path) - -class TimingInstrumenter(RecordingInstrumenter): - def parse_time_str(self, s): - # 1.7 -> 1.7 - # 1:20.3 -> 80.3 - if ":" in s: - minutes, seconds = s.split(":") - return 60 * int(minutes) + float(seconds) - else: - return float(s) - - def run_bg(self, name, cmd): - # We put a noticable string "DATA" at the start of the format, so that - # we can find it even if time(1) decides to print other garbage, like - # "Command exited with non-zero status 1". - my_cmd = ["time", "-f", "DATA: %U %S %E", "-o", "timings-" + name] + cmd - def timing_hook(result): - timing_file = open("timings-" + name, "r") - for line in timing_file: - if line.startswith("DATA"): - break - assert line.startswith("DATA") - junk, user, sys, wall = line.split() - self.record_stat(name + "-user-time", self.parse_time_str(user)) - self.record_stat(name + "-system-time", self.parse_time_str(sys)) - self.record_stat(name + "-wall-time", self.parse_time_str(wall)) - process = super(TimingInstrumenter, self).run_bg(name, my_cmd) - process.hook(timing_hook) - return process - - class Benchmark(object): def setup(self, testable): # I am called from a special directory, which my run method will @@ -155,16 +28,6 @@ def pull(self, name, repo, instrumenter): pass -class ExistingRepo(object): - def __init__(self, path): - self.path = os.path.abspath(path) - - def setup(self): - pass - - def repo_path(self): - return self.path - # each benchmark has a setup and a run # both are done in the same directory # and a directory to drop auxiliary results in @@ -203,148 +66,6 @@ # get the instrumentation to combine multiple runs together, however it # wants to do that... -class MtnFactory(object): - def __init__(self, path): - self.path = path - - def new(self, instrumenter): - return Mtn(self.path, instrumenter) - -class Mtn(object): - def __init__(self, path, instrumenter): - self.path = path - self.instrumenter = instrumenter - - def setup(self): - shutil.copy(self.path, "mtn-server") - shutil.copy(self.path, "mtn-client") - shutil.copy(self.path, "mtn") - f = open("server-perms.lua", "w") - f.write(""" - function get_netsync_read_permitted(pattern, identity) - return true - end - function get_netsync_write_permitted(identity) - return true - end - function get_passphrase(keyid) - return keyid - end - """) - f.close() - os.mkdir("keys") - f = open("keys/address@hidden", "w") - f.write("""[keypair address@hidden -MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC6Pz+IvvOCDDqzN9WFO/zOjL7s9dVCS+zn -s/L9jQ2kHfNWXFof4GcgmMu4DfU4sUrRz39QxDlUrxEOvmIc9z3DNuIcbFZx7UZg9DWfdmDm -vbW79bZlVMeIudAIUwa6euX163AK5hacmqJfuB5U7awQM9o3rn8JYULleAoz5QTtawIDAQAB# -MIICyTBDBgkqhkiG9w0BBQ0wNjAeBgkqhkiG9w0BBQwwEQQIvYSV8ucj9m4CAggAAgEYMBQG -CCqGSIb3DQMHBAg/BZPM2O3QfASCAoBBGkVz4E/Pr1CsIioC92eCz4qWLclhc53HgHSCEo9I -XdNCTpCs/oxOXhQ0WQCPFhYEaxU8STgZm0Yhq8WEF1QfxOPOU8nDiwMT0L7/ARruu5bTCxnW -B3kkn+XiO5GldVJhULFlrl91t83yMsTSw+vyCyxZkqewBLR7mqHQUe2suVquMyutxxr2vZgV -QMfRxk65fSvySUHeNaj1dmakYcpP+35iejyUTAtAGuBsv2C68bwif4wkpLpedghNCtmccSdQ -t9QDF3yy6Q42tAW/OK6/t836/qn39f+47Kp4LMJUMmxNrtV7IntIkgBGgnGsqP9Br2B4GYXc -sWK0YApA3+Sf3kfH/wQ6Hib8nN4YxUTxxnS9WNHvRFrXCmfbGd5vAzi4lKCm/W+2Nlpd4DDQ -3JZjjCR73PMfKtHJCGULkNkK/9kRyhLYql2u/ZUJoEcdZxzEpYgExW8Wu1CrCVtWd+ueXs1h -or6Fdua7Gg4cjMgVg6EUSxdMBFQCX8heD8JeG6jMFNR9hTxe8o/PK8Ys63JyLMLRUv3Ud+f8 -8T0TtCZV5+rgLfvb6k89uDJJK228WuJB6rp8S+qqq30RFPmkzW8JNulRilY+wrIfcowA6+TA -T5WKzFOIbkZd/R34tNLJMjTJlUq6SQKaOlQnqOEFbyY/GXgzYgnmc3tl8pigXEJvNzU5GiuB -ib35QQbzh87KlfLtWELK+8ZoyhZAZAMr97IavUbuFubOyEoEozUliARyRZ1ZudM4Ii+J6TRX -cmLryIBlz3OXgUUBSwJPwtWuR4tZ8nIt7cVJr7pxLblGfeFuu01HWN55hv4C78/aNSipVYCF -OFt8n7wQHxbbJvoTIdd/ -[end]""") - f.close() - - - def pull(self, source, target): - server = self.instrumenter.run_bg("server", - ["./mtn-server", - "--rcfile", "server-perms.lua", - "--keydir", "keys", - "-d", source, - "--bind=localhost:12345", - "serve", "*"]) - time.sleep(3) - self.instrumenter.run("pull", - ["./mtn-client", "-d", target, "pull", "localhost:12345", "*"]) - server.kill() - - def init_repo(self, repo): - self.instrumenter.run("init_repo", - ["./mtn", "db", "init", "-d", repo]) - -class PullBenchmark(object): - def __init__(self, repo_source): - self.repo_source = repo_source - - def setup(self, vcs): - vcs.init_repo("target") - self.repo_source.setup() - - def run(self, vcs): - vcs.pull(self.repo_source.repo_path(), "target") - - -class Driver(object): - def __init__(self, scratch, results, testables, benchmarks, instrumenters, - debug, cache_clearer): - self.scratch = os.path.abspath(scratch) - self.results = os.path.abspath(results) - self.testables = testables - self.benchmarks = benchmarks - self.instrumenters = instrumenters - self.debug = debug - self.cache_clearer = cache_clearer - - def run(self): - startdir = os.getcwd() - for benchmark_name, benchmark_obj in self.benchmarks.iteritems(): - benchdir = os.path.join(self.scratch, benchmark_name) - - instrumenters = {} - for testable_name, testable_f in self.testables.iteritems(): - testable = testable_f.new(Instrumenter("")) - setupdir = os.path.join(benchdir, "setup-" + testable_name) - ensure_dir(setupdir) - os.chdir(setupdir) - testable.setup() - benchmark_obj.setup(testable) - - - for instrumenter_name, instrumenter_f in self.instrumenters.iteritems(): - instrumenters = {} - for testable_name in self.testables: - recorddir = os.path.join(self.results, - "%s-%s-%s" % (benchmark_name, - testable_name, - instrumenter_name)) - instrumenters[testable_name] = instrumenter_f.new(recorddir) - - for repeat in xrange(instrumenter_f.repeats): - plan = self.testables.keys() - random.shuffle(plan) - for testable_name in plan: - instrumenter = instrumenters[testable_name] - testable = self.testables[testable_name].new(instrumenter) - rundir = os.path.join(benchdir, - "run-%s-%s-%s" % (testable_name, - instrumenter_name, - repeat)) - shutil.copytree(os.path.join(benchdir, - "setup-" + testable_name), - rundir) - os.chdir(rundir) - self.cache_clearer() - benchmark_obj.run(testable) - instrumenter.flush() - if not self.debug: - os.chdir(startdir) - shutil.rmtree(rundir) - - if not self.debug: - for testable_name in self.testables.iterkeys(): - shutil.rmtree(os.path.join(benchdir, "setup-" + testable_name)) - # TODO: # figure out some consistent class/object/factory naming scheme @@ -359,11 +80,10 @@ instrumenters = {"time": InstrumenterFactory(TimingInstrumenter, 2)} debug = 1 - def clear_caches(): - pass + cache_clearer = CacheClearer("./drop_caches") driver = Driver(scratch, results, testables, benchmarks, instrumenters, - debug, clear_caches) + debug, cache_clearer) driver.run() if __name__ == "__main__":